diff options
115 files changed, 2812 insertions, 2351 deletions
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index 01539f41067..4949fcaa6b6 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -49,8 +49,7 @@ Mount options NILFS2 supports the following mount options: (*) == default -barrier=on(*) This enables/disables barriers. barrier=off disables - it, barrier=on enables it. +nobarrier Disables barriers. errors=continue(*) Keep going on a filesystem error. errors=remount-ro Remount the filesystem read-only on an error. errors=panic Panic and halt the machine if an error occurs. @@ -71,6 +70,10 @@ order=strict Apply strict in-order semantics that preserves sequence blocks. That means, it is guaranteed that no overtaking of events occurs in the recovered file system after a crash. +norecovery Disable recovery of the filesystem on mount. + This disables every write access on the device for + read-only mounts or snapshots. This option will fail + for r/w mounts on an unclean volume. NILFS2 usage ============ diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h index 88afb54501e..67455c2ed2b 100644 --- a/arch/ia64/include/asm/xen/hypervisor.h +++ b/arch/ia64/include/asm/xen/hypervisor.h @@ -37,35 +37,9 @@ #include <xen/interface/xen.h> #include <xen/interface/version.h> /* to compile feature.c */ #include <xen/features.h> /* to comiple xen-netfront.c */ +#include <xen/xen.h> #include <asm/xen/hypercall.h> -/* xen_domain_type is set before executing any C code by early_xen_setup */ -enum xen_domain_type { - XEN_NATIVE, /* running on bare hardware */ - XEN_PV_DOMAIN, /* running in a PV domain */ - XEN_HVM_DOMAIN, /* running in a Xen hvm domain*/ -}; - -#ifdef CONFIG_XEN -extern enum xen_domain_type xen_domain_type; -#else -#define xen_domain_type XEN_NATIVE -#endif - -#define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain() && \ - xen_domain_type == XEN_PV_DOMAIN) -#define xen_hvm_domain() (xen_domain() && \ - xen_domain_type == XEN_HVM_DOMAIN) - -#ifdef CONFIG_XEN_DOM0 -#define xen_initial_domain() (xen_pv_domain() && \ - (xen_start_info->flags & SIF_INITDOMAIN)) -#else -#define xen_initial_domain() (0) -#endif - - #ifdef CONFIG_XEN extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index c0fca2c1c85..df639db779f 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -131,6 +131,7 @@ alloc_pci_controller (int seg) } struct pci_root_info { + struct acpi_device *bridge; struct pci_controller *controller; char *name; }; @@ -297,9 +298,20 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data) window->offset = offset; if (insert_resource(root, &window->resource)) { - printk(KERN_ERR "alloc 0x%llx-0x%llx from %s for %s failed\n", - window->resource.start, window->resource.end, - root->name, info->name); + dev_err(&info->bridge->dev, + "can't allocate host bridge window %pR\n", + &window->resource); + } else { + if (offset) + dev_info(&info->bridge->dev, "host bridge window %pR " + "(PCI address [%#llx-%#llx])\n", + &window->resource, + window->resource.start - offset, + window->resource.end - offset); + else + dev_info(&info->bridge->dev, + "host bridge window %pR\n", + &window->resource); } return AE_OK; @@ -319,8 +331,9 @@ pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl) (res->end - res->start < 16)) continue; if (j >= PCI_BUS_NUM_RESOURCES) { - printk("Ignoring range [%#llx-%#llx] (%lx)\n", - res->start, res->end, res->flags); + dev_warn(&bus->dev, + "ignoring host bridge window %pR (no space)\n", + res); continue; } bus->resource[j++] = res; @@ -364,6 +377,7 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) goto out3; sprintf(name, "PCI Bus %04x:%02x", domain, bus); + info.bridge = device; info.controller = controller; info.name = name; acpi_walk_resources(device->handle, METHOD_NAME__CRS, @@ -720,9 +734,6 @@ int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) return ret; } -/* It's defined in drivers/pci/pci.c */ -extern u8 pci_cache_line_size; - /** * set_pci_cacheline_size - determine cacheline size for PCI devices * @@ -731,7 +742,7 @@ extern u8 pci_cache_line_size; * * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info(). */ -static void __init set_pci_cacheline_size(void) +static void __init set_pci_dfl_cacheline_size(void) { unsigned long levels, unique_caches; long status; @@ -751,7 +762,7 @@ static void __init set_pci_cacheline_size(void) "(status=%ld)\n", __func__, status); return; } - pci_cache_line_size = (1 << cci.pcci_line_size) / 4; + pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4; } u64 ia64_dma_get_required_mask(struct device *dev) @@ -782,7 +793,7 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init pcibios_init(void) { - set_pci_cacheline_size(); + set_pci_dfl_cacheline_size(); return 0; } diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h index b63e51c3c3e..b0576df6ec8 100644 --- a/arch/sparc/include/asm/pci_64.h +++ b/arch/sparc/include/asm/pci_64.h @@ -16,8 +16,6 @@ #define PCI_IRQ_NONE 0xffffffff -#define PCI_CACHE_LINE_BYTES 64 - static inline void pcibios_set_master(struct pci_dev *dev) { /* No special bus mastering setup handling */ diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index c6864866280..b85374f7cf9 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -1081,3 +1081,10 @@ void pci_resource_to_user(const struct pci_dev *pdev, int bar, *start = rp->start - offset; *end = rp->end - offset; } + +static int __init pcibios_init(void) +{ + pci_dfl_cache_line_size = 64 >> 2; + return 0; +} +subsys_initcall(pcibios_init); diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988eee3..b4bf9a942ed 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -118,11 +118,27 @@ extern int __init pcibios_init(void); /* pci-mmconfig.c */ +/* "PCI MMCONFIG %04x [bus %02x-%02x]" */ +#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) + +struct pci_mmcfg_region { + struct list_head list; + struct resource res; + u64 address; + char __iomem *virt; + u16 segment; + u8 start_bus; + u8 end_bus; + char name[PCI_MMCFG_RESOURCE_NAME_LEN]; +}; + extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); +extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); + +extern struct list_head pci_mmcfg_list; -extern struct acpi_mcfg_allocation *pci_mmcfg_config; -extern int pci_mmcfg_config_num; +#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) /* * AMD Fam10h CPUs are buggy, and cannot access MMIO config space diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index d5b7e90c0ed..396ff4cc8ed 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -37,31 +37,4 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -enum xen_domain_type { - XEN_NATIVE, /* running on bare hardware */ - XEN_PV_DOMAIN, /* running in a PV domain */ - XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ -}; - -#ifdef CONFIG_XEN -extern enum xen_domain_type xen_domain_type; -#else -#define xen_domain_type XEN_NATIVE -#endif - -#define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain() && \ - xen_domain_type == XEN_PV_DOMAIN) -#define xen_hvm_domain() (xen_domain() && \ - xen_domain_type == XEN_HVM_DOMAIN) - -#ifdef CONFIG_XEN_DOM0 -#include <xen/interface/xen.h> - -#define xen_initial_domain() (xen_pv_domain() && \ - xen_start_info->flags & SIF_INITDOMAIN) -#else /* !CONFIG_XEN_DOM0 */ -#define xen_initial_domain() (0) -#endif /* CONFIG_XEN_DOM0 */ - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 7ffc3996523..9c4a6f74755 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1336,6 +1336,9 @@ void __init amd_iommu_detect(void) iommu_detected = 1; amd_iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; + + /* Make sure ACS will be enabled */ + pci_request_acs(); } } diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index d49202e740e..564b008a51c 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,3 +15,8 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o +obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o + +ifeq ($(CONFIG_PCI_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1014eb4bfc3..959e548a703 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -7,6 +7,7 @@ #include <asm/pci_x86.h> struct pci_root_info { + struct acpi_device *bridge; char *name; unsigned int res_num; struct resource *res; @@ -58,6 +59,30 @@ bus_has_transparent_bridge(struct pci_bus *bus) return false; } +static void +align_resource(struct acpi_device *bridge, struct resource *res) +{ + int align = (res->flags & IORESOURCE_MEM) ? 16 : 4; + + /* + * Host bridge windows are not BARs, but the decoders on the PCI side + * that claim this address space have starting alignment and length + * constraints, so fix any obvious BIOS goofs. + */ + if (!IS_ALIGNED(res->start, align)) { + dev_printk(KERN_DEBUG, &bridge->dev, + "host bridge window %pR invalid; " + "aligning start to %d-byte boundary\n", res, align); + res->start &= ~(align - 1); + } + if (!IS_ALIGNED(res->end + 1, align)) { + dev_printk(KERN_DEBUG, &bridge->dev, + "host bridge window %pR invalid; " + "aligning end to %d-byte boundary\n", res, align); + res->end = ALIGN(res->end, align) - 1; + } +} + static acpi_status setup_resource(struct acpi_resource *acpi_res, void *data) { @@ -91,11 +116,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data) start = addr.minimum + addr.translation_offset; end = start + addr.address_length - 1; if (info->res_num >= max_root_bus_resources) { - printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " - "from %s for %s due to _CRS returning more than " - "%d resource descriptors\n", (unsigned long) start, - (unsigned long) end, root->name, info->name, - max_root_bus_resources); + if (pci_probe & PCI_USE__CRS) + printk(KERN_WARNING "PCI: Failed to allocate " + "0x%lx-0x%lx from %s for %s due to _CRS " + "returning more than %d resource descriptors\n", + (unsigned long) start, (unsigned long) end, + root->name, info->name, max_root_bus_resources); return AE_OK; } @@ -105,14 +131,28 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->start = start; res->end = end; res->child = NULL; + align_resource(info->bridge, res); + + if (!(pci_probe & PCI_USE__CRS)) { + dev_printk(KERN_DEBUG, &info->bridge->dev, + "host bridge window %pR (ignored)\n", res); + return AE_OK; + } if (insert_resource(root, res)) { - printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " - "from %s for %s\n", (unsigned long) res->start, - (unsigned long) res->end, root->name, info->name); + dev_err(&info->bridge->dev, + "can't allocate host bridge window %pR\n", res); } else { info->bus->resource[info->res_num] = res; info->res_num++; + if (addr.translation_offset) + dev_info(&info->bridge->dev, "host bridge window %pR " + "(PCI address [%#llx-%#llx])\n", + res, res->start - addr.translation_offset, + res->end - addr.translation_offset); + else + dev_info(&info->bridge->dev, + "host bridge window %pR\n", res); } return AE_OK; } @@ -124,6 +164,12 @@ get_current_resources(struct acpi_device *device, int busnum, struct pci_root_info info; size_t size; + if (!(pci_probe & PCI_USE__CRS)) + dev_info(&device->dev, + "ignoring host bridge windows from ACPI; " + "boot with \"pci=use_crs\" to use them\n"); + + info.bridge = device; info.bus = bus; info.res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, @@ -163,8 +209,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do #endif if (domain && !pci_domains_supported) { - printk(KERN_WARNING "PCI: Multiple domains not supported " - "(dom %d, bus %d)\n", domain, busnum); + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (multiple domains not supported)\n", + domain, busnum); return NULL; } @@ -188,7 +235,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do */ sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!sd) { - printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (out of memory)\n", domain, busnum); return NULL; } @@ -209,9 +257,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } else { bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); if (bus) { - if (pci_probe & PCI_USE__CRS) - get_current_resources(device, busnum, domain, - bus); + get_current_resources(device, busnum, domain, bus); bus->subordinate = pci_scan_child_bus(bus); } } diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 572ee9782f2..95ecbd49595 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -6,10 +6,10 @@ #ifdef CONFIG_X86_64 #include <asm/pci-direct.h> -#include <asm/mpspec.h> -#include <linux/cpumask.h> #endif +#include "bus_numa.h" + /* * This discovers the pcibus <-> node mapping on AMD K8. * also get peer root bus resource for io,mmio @@ -17,67 +17,6 @@ #ifdef CONFIG_X86_64 -/* - * sub bus (transparent) will use entres from 3 to store extra from root, - * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? - */ -#define RES_NUM 16 -struct pci_root_info { - char name[12]; - unsigned int res_num; - struct resource res[RES_NUM]; - int bus_min; - int bus_max; - int node; - int link; -}; - -/* 4 at this time, it may become to 32 */ -#define PCI_ROOT_NR 4 -static int pci_root_num; -static struct pci_root_info pci_root_info[PCI_ROOT_NR]; - -void x86_pci_root_bus_res_quirks(struct pci_bus *b) -{ - int i; - int j; - struct pci_root_info *info; - - /* don't go for it if _CRS is used already */ - if (b->resource[0] != &ioport_resource || - b->resource[1] != &iomem_resource) - return; - - /* if only one root bus, don't need to anything */ - if (pci_root_num < 2) - return; - - for (i = 0; i < pci_root_num; i++) { - if (pci_root_info[i].bus_min == b->number) - break; - } - - if (i == pci_root_num) - return; - - printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", - b->number); - - info = &pci_root_info[i]; - for (j = 0; j < info->res_num; j++) { - struct resource *res; - struct resource *root; - - res = &info->res[j]; - b->resource[j] = res; - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; - else - root = &iomem_resource; - insert_resource(root, res); - } -} - #define RANGE_NUM 16 struct res_range { @@ -130,52 +69,6 @@ static void __init update_range(struct res_range *range, size_t start, } } -static void __init update_res(struct pci_root_info *info, size_t start, - size_t end, unsigned long flags, int merge) -{ - int i; - struct resource *res; - - if (!merge) - goto addit; - - /* try to merge it with old one */ - for (i = 0; i < info->res_num; i++) { - size_t final_start, final_end; - size_t common_start, common_end; - - res = &info->res[i]; - if (res->flags != flags) - continue; - - common_start = max((size_t)res->start, start); - common_end = min((size_t)res->end, end); - if (common_start > common_end + 1) - continue; - - final_start = min((size_t)res->start, start); - final_end = max((size_t)res->end, end); - - res->start = final_start; - res->end = final_end; - return; - } - -addit: - - /* need to add that */ - if (info->res_num >= RES_NUM) - return; - - res = &info->res[info->res_num]; - res->name = info->name; - res->flags = flags; - res->start = start; - res->end = end; - res->child = NULL; - info->res_num++; -} - struct pci_hostbridge_probe { u32 bus; u32 slot; @@ -230,7 +123,6 @@ static int __init early_fill_mp_bus_info(void) int j; unsigned bus; unsigned slot; - int found; int node; int link; int def_node; @@ -247,7 +139,7 @@ static int __init early_fill_mp_bus_info(void) if (!early_pci_allowed()) return -1; - found = 0; + found_all_numa_early = 0; for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { u32 id; u16 device; @@ -261,12 +153,12 @@ static int __init early_fill_mp_bus_info(void) device = (id>>16) & 0xffff; if (pci_probes[i].vendor == vendor && pci_probes[i].device == device) { - found = 1; + found_all_numa_early = 1; break; } } - if (!found) + if (!found_all_numa_early) return 0; pci_root_num = 0; @@ -488,7 +380,7 @@ static int __init early_fill_mp_bus_info(void) info = &pci_root_info[i]; res_num = info->res_num; busnum = info->bus_min; - printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", info->bus_min, info->bus_max, info->node, info->link); for (j = 0; j < res_num; j++) { res = &info->res[j]; diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c new file mode 100644 index 00000000000..145df00e038 --- /dev/null +++ b/arch/x86/pci/bus_numa.c @@ -0,0 +1,101 @@ +#include <linux/init.h> +#include <linux/pci.h> + +#include "bus_numa.h" + +int pci_root_num; +struct pci_root_info pci_root_info[PCI_ROOT_NR]; +int found_all_numa_early; + +void x86_pci_root_bus_res_quirks(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + /* don't go for it if _CRS is used already */ + if (b->resource[0] != &ioport_resource || + b->resource[1] != &iomem_resource) + return; + + if (!pci_root_num) + return; + + /* for amd, if only one root bus, don't need to do anything */ + if (pci_root_num < 2 && found_all_numa_early) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", + b->number); + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (start > end) + return; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + size_t final_start, final_end; + size_t common_start, common_end; + + res = &info->res[i]; + if (res->flags != flags) + continue; + + common_start = max((size_t)res->start, start); + common_end = min((size_t)res->end, end); + if (common_start > common_end + 1) + continue; + + final_start = min((size_t)res->start, start); + final_end = max((size_t)res->end, end); + + res->start = final_start; + res->end = final_end; + return; + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h new file mode 100644 index 00000000000..adbc23fe82a --- /dev/null +++ b/arch/x86/pci/bus_numa.h @@ -0,0 +1,27 @@ +#ifdef CONFIG_X86_64 + +/* + * sub bus (transparent) will use entres from 3 to store extra from + * root, so need to make sure we have enough slot there, Should we + * increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +extern int pci_root_num; +extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; +extern int found_all_numa_early; + +extern void update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge); +#endif diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1331fcf2614..d2552c68e94 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return bus; } -extern u8 pci_cache_line_size; - int __init pcibios_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -422,15 +420,19 @@ int __init pcibios_init(void) } /* - * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 - * and P4. It's also good for 386/486s (which actually have 16) + * Set PCI cacheline size to that of the CPU if the CPU has reported it. + * (For older CPUs that don't support cpuid, we se it to 32 bytes + * It's also good for 386/486s (which actually have 16) * as quite a few PCI devices do not support smaller values. */ - pci_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ + if (c->x86_clflush_size > 0) { + pci_dfl_cache_line_size = c->x86_clflush_size >> 2; + printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n", + pci_dfl_cache_line_size << 2); + } else { + pci_dfl_cache_line_size = 32 >> 2; + printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n"); + } pcibios_resource_survey(); diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index aaf26ae58cd..d1067d539be 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -12,8 +12,6 @@ u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) u32 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inl(0xcfc); - if (v != 0xffffffff) - pr_debug("%x reading 4 from %x: %x\n", slot, offset, v); return v; } @@ -22,7 +20,6 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) u8 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inb(0xcfc + (offset&3)); - pr_debug("%x reading 1 from %x: %x\n", slot, offset, v); return v; } @@ -31,28 +28,24 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) u16 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inw(0xcfc + (offset&2)); - pr_debug("%x reading 2 from %x: %x\n", slot, offset, v); return v; } void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outl(val, 0xcfc); } void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outb(val, 0xcfc + (offset&3)); } void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outw(val, 0xcfc + (offset&2)); } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b22d13b0c71..5dc9e8c63fc 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,9 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) continue; if (!r->start || pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, + "can't reserve window %pR\n", + r); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -144,16 +146,29 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) } } +struct pci_check_idx_range { + int start; + int end; +}; + static void __init pcibios_allocate_resources(int pass) { struct pci_dev *dev = NULL; - int idx, disabled; + int idx, disabled, i; u16 command; struct resource *r; + struct pci_check_idx_range idx_range[] = { + { PCI_STD_RESOURCES, PCI_STD_RESOURCE_END }, +#ifdef CONFIG_PCI_IOV + { PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END }, +#endif + }; + for_each_pci_dev(dev) { pci_read_config_word(dev, PCI_COMMAND, &command); - for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) { + for (i = 0; i < ARRAY_SIZE(idx_range); i++) + for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { r = &dev->resource[idx]; if (r->parent) /* Already allocated */ continue; @@ -164,12 +179,12 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n", - (unsigned long long) r->start, - (unsigned long long) r->end, - r->flags, disabled, pass); + dev_dbg(&dev->dev, + "BAR %d: reserving %pr (d=%d, p=%d)\n", + idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, + "can't reserve %pR\n", r); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -182,7 +197,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - dev_dbg(&dev->dev, "disabling ROM\n"); + dev_dbg(&dev->dev, "disabling ROM %pR\n", r); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); @@ -282,6 +297,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; prot = pgprot_val(vma->vm_page_prot); + + /* + * Return error if pat is not enabled and write_combine is requested. + * Caller can followup with UC MINUS request and add a WC mtrr if there + * is a free mtrr slot. + */ + if (!pat_enabled && write_combine) + return -EINVAL; + if (pat_enabled && write_combine) prot |= _PAGE_CACHE_WC; else if (pat_enabled || boot_cpu_data.x86 > 3) diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c new file mode 100644 index 00000000000..b7a55dc55d1 --- /dev/null +++ b/arch/x86/pci/intel_bus.c @@ -0,0 +1,90 @@ +/* + * to read io range from IOH pci conf, need to do it after mmconfig is there + */ + +#include <linux/delay.h> +#include <linux/dmi.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <asm/pci_x86.h> + +#include "bus_numa.h" + +static inline void print_ioh_resources(struct pci_root_info *info) +{ + int res_num; + int busnum; + int i; + + printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", + info->bus_min, info->bus_max); + res_num = info->res_num; + busnum = info->bus_min; + for (i = 0; i < res_num; i++) { + struct resource *res; + + res = &info->res[i]; + printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", + busnum, i, + (res->flags & IORESOURCE_IO) ? "io port" : + "mmio", + res->start, res->end); + } +} + +#define IOH_LIO 0x108 +#define IOH_LMMIOL 0x10c +#define IOH_LMMIOH 0x110 +#define IOH_LMMIOH_BASEU 0x114 +#define IOH_LMMIOH_LIMITU 0x118 +#define IOH_LCFGBUS 0x11c + +static void __devinit pci_root_bus_res(struct pci_dev *dev) +{ + u16 word; + u32 dword; + struct pci_root_info *info; + u16 io_base, io_end; + u32 mmiol_base, mmiol_end; + u64 mmioh_base, mmioh_end; + int bus_base, bus_end; + + if (pci_root_num >= PCI_ROOT_NR) { + printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); + return; + } + + info = &pci_root_info[pci_root_num]; + pci_root_num++; + + pci_read_config_word(dev, IOH_LCFGBUS, &word); + bus_base = (word & 0xff); + bus_end = (word & 0xff00) >> 8; + sprintf(info->name, "PCI Bus #%02x", bus_base); + info->bus_min = bus_base; + info->bus_max = bus_end; + + pci_read_config_word(dev, IOH_LIO, &word); + io_base = (word & 0xf0) << (12 - 4); + io_end = (word & 0xf000) | 0xfff; + update_res(info, io_base, io_end, IORESOURCE_IO, 0); + + pci_read_config_dword(dev, IOH_LMMIOL, &dword); + mmiol_base = (dword & 0xff00) << (24 - 8); + mmiol_end = (dword & 0xff000000) | 0xffffff; + update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); + + pci_read_config_dword(dev, IOH_LMMIOH, &dword); + mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); + mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); + pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); + mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; + pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); + mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; + update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); + + print_ioh_resources(info); +} + +/* intel IOH */ +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 602c172d3bd..b19d1e54201 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -15,48 +15,98 @@ #include <linux/acpi.h> #include <linux/sfi_acpi.h> #include <linux/bitmap.h> -#include <linux/sort.h> +#include <linux/dmi.h> #include <asm/e820.h> #include <asm/pci_x86.h> #include <asm/acpi.h> #define PREFIX "PCI: " -/* aperture is up to 256MB but BIOS may reserve less */ -#define MMCONFIG_APER_MIN (2 * 1024*1024) -#define MMCONFIG_APER_MAX (256 * 1024*1024) - /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; -static __init int extend_mmcfg(int num) +LIST_HEAD(pci_mmcfg_list); + +static __init void pci_mmconfig_remove(struct pci_mmcfg_region *cfg) { - struct acpi_mcfg_allocation *new; - int new_num = pci_mmcfg_config_num + num; + if (cfg->res.parent) + release_resource(&cfg->res); + list_del(&cfg->list); + kfree(cfg); +} - new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); - if (!new) - return -1; +static __init void free_all_mmcfg(void) +{ + struct pci_mmcfg_region *cfg, *tmp; - if (pci_mmcfg_config) { - memcpy(new, pci_mmcfg_config, - sizeof(pci_mmcfg_config[0]) * new_num); - kfree(pci_mmcfg_config); + pci_mmcfg_arch_free(); + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) + pci_mmconfig_remove(cfg); +} + +static __init void list_add_sorted(struct pci_mmcfg_region *new) +{ + struct pci_mmcfg_region *cfg; + + /* keep list sorted by segment and starting bus number */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->segment > new->segment || + (cfg->segment == new->segment && + cfg->start_bus >= new->start_bus)) { + list_add_tail(&new->list, &cfg->list); + return; + } } - pci_mmcfg_config = new; + list_add_tail(&new->list, &pci_mmcfg_list); +} - return 0; +static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, + int end, u64 addr) +{ + struct pci_mmcfg_region *new; + int num_buses; + struct resource *res; + + if (addr == 0) + return NULL; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + new->address = addr; + new->segment = segment; + new->start_bus = start; + new->end_bus = end; + + list_add_sorted(new); + + num_buses = end - start + 1; + res = &new->res; + res->start = addr + PCI_MMCFG_BUS_OFFSET(start); + res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); + res->name = new->name; + + printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at " + "%pR (base %#lx)\n", segment, start, end, &new->res, + (unsigned long) addr); + + return new; } -static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end) +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) { - int i = pci_mmcfg_config_num; + struct pci_mmcfg_region *cfg; - pci_mmcfg_config_num++; - pci_mmcfg_config[i].address = addr; - pci_mmcfg_config[i].pci_segment = segment; - pci_mmcfg_config[i].start_bus_number = start; - pci_mmcfg_config[i].end_bus_number = end; + list_for_each_entry(cfg, &pci_mmcfg_list, list) + if (cfg->segment == segment && + cfg->start_bus <= bus && bus <= cfg->end_bus) + return cfg; + + return NULL; } static const char __init *pci_mmcfg_e7520(void) @@ -68,11 +118,9 @@ static const char __init *pci_mmcfg_e7520(void) if (win == 0x0000 || win == 0xf000) return NULL; - if (extend_mmcfg(1) == -1) + if (pci_mmconfig_add(0, 0, 255, win << 16) == NULL) return NULL; - fill_one_mmcfg(win << 16, 0, 0, 255); - return "Intel Corporation E7520 Memory Controller Hub"; } @@ -114,11 +162,9 @@ static const char __init *pci_mmcfg_intel_945(void) if ((pciexbar & mask) >= 0xf0000000U) return NULL; - if (extend_mmcfg(1) == -1) + if (pci_mmconfig_add(0, 0, (len >> 20) - 1, pciexbar & mask) == NULL) return NULL; - fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1); - return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } @@ -127,7 +173,7 @@ static const char __init *pci_mmcfg_amd_fam10h(void) u32 low, high, address; u64 base, msr; int i; - unsigned segnbits = 0, busnbits; + unsigned segnbits = 0, busnbits, end_bus; if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) return NULL; @@ -161,11 +207,13 @@ static const char __init *pci_mmcfg_amd_fam10h(void) busnbits = 8; } - if (extend_mmcfg(1 << segnbits) == -1) - return NULL; - + end_bus = (1 << busnbits) - 1; for (i = 0; i < (1 << segnbits); i++) - fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1); + if (pci_mmconfig_add(i, 0, end_bus, + base + (1<<28) * i) == NULL) { + free_all_mmcfg(); + return NULL; + } return "AMD Family 10h NB"; } @@ -190,7 +238,7 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) /* * do check if amd fam10h already took over */ - if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) + if (!acpi_disabled || !list_empty(&pci_mmcfg_list) || mcp55_checked) return NULL; mcp55_checked = true; @@ -213,16 +261,14 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) if (!(extcfg & extcfg_enable_mask)) continue; - if (extend_mmcfg(1) == -1) - continue; - size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; base = extcfg & extcfg_base_mask[size_index]; /* base could > 4G */ base <<= extcfg_base_lshift; start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; end = start + extcfg_sizebus[size_index] - 1; - fill_one_mmcfg(base, 0, start, end); + if (pci_mmconfig_add(0, start, end, base) == NULL) + continue; mcp55_mmconf_found++; } @@ -253,45 +299,27 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { 0x0369, pci_mmcfg_nvidia_mcp55 }, }; -static int __init cmp_mmcfg(const void *x1, const void *x2) -{ - const typeof(pci_mmcfg_config[0]) *m1 = x1; - const typeof(pci_mmcfg_config[0]) *m2 = x2; - int start1, start2; - - start1 = m1->start_bus_number; - start2 = m2->start_bus_number; - - return start1 - start2; -} - static void __init pci_mmcfg_check_end_bus_number(void) { - int i; - typeof(pci_mmcfg_config[0]) *cfg, *cfgx; - - /* sort them at first */ - sort(pci_mmcfg_config, pci_mmcfg_config_num, - sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL); + struct pci_mmcfg_region *cfg, *cfgx; /* last one*/ - if (pci_mmcfg_config_num > 0) { - i = pci_mmcfg_config_num - 1; - cfg = &pci_mmcfg_config[i]; - if (cfg->end_bus_number < cfg->start_bus_number) - cfg->end_bus_number = 255; - } + cfg = list_entry(pci_mmcfg_list.prev, typeof(*cfg), list); + if (cfg) + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; - /* don't overlap please */ - for (i = 0; i < pci_mmcfg_config_num - 1; i++) { - cfg = &pci_mmcfg_config[i]; - cfgx = &pci_mmcfg_config[i+1]; + if (list_is_singular(&pci_mmcfg_list)) + return; - if (cfg->end_bus_number < cfg->start_bus_number) - cfg->end_bus_number = 255; + /* don't overlap please */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; - if (cfg->end_bus_number >= cfgx->start_bus_number) - cfg->end_bus_number = cfgx->start_bus_number - 1; + cfgx = list_entry(cfg->list.next, typeof(*cfg), list); + if (cfg != cfgx && cfg->end_bus >= cfgx->start_bus) + cfg->end_bus = cfgx->start_bus - 1; } } @@ -306,8 +334,7 @@ static int __init pci_mmcfg_check_hostbridge(void) if (!raw_pci_ops) return 0; - pci_mmcfg_config_num = 0; - pci_mmcfg_config = NULL; + free_all_mmcfg(); for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; @@ -322,45 +349,22 @@ static int __init pci_mmcfg_check_hostbridge(void) name = pci_mmcfg_probes[i].probe(); if (name) - printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n", + printk(KERN_INFO PREFIX "%s with MMCONFIG support\n", name); } /* some end_bus_number is crazy, fix it */ pci_mmcfg_check_end_bus_number(); - return pci_mmcfg_config_num != 0; + return !list_empty(&pci_mmcfg_list); } static void __init pci_mmcfg_insert_resources(void) { -#define PCI_MMCFG_RESOURCE_NAME_LEN 24 - int i; - struct resource *res; - char *names; - unsigned num_buses; - - res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), - pci_mmcfg_config_num, GFP_KERNEL); - if (!res) { - printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); - return; - } + struct pci_mmcfg_region *cfg; - names = (void *)&res[pci_mmcfg_config_num]; - for (i = 0; i < pci_mmcfg_config_num; i++, res++) { - struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; - res->name = names; - snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, - "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, - cfg->start_bus_number, cfg->end_bus_number); - res->start = cfg->address + (cfg->start_bus_number << 20); - res->end = res->start + (num_buses << 20) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - names += PCI_MMCFG_RESOURCE_NAME_LEN; - } + list_for_each_entry(cfg, &pci_mmcfg_list, list) + insert_resource(&iomem_resource, &cfg->res); /* Mark that the resources have been inserted. */ pci_mmcfg_resources_inserted = 1; @@ -437,11 +441,12 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); static int __init is_mmconf_reserved(check_reserved_t is_reserved, - u64 addr, u64 size, int i, - typeof(pci_mmcfg_config[0]) *cfg, int with_e820) + struct pci_mmcfg_region *cfg, int with_e820) { + u64 addr = cfg->res.start; + u64 size = resource_size(&cfg->res); u64 old_size = size; - int valid = 0; + int valid = 0, num_buses; while (!is_reserved(addr, addr + size, E820_RESERVED)) { size >>= 1; @@ -450,19 +455,25 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, } if (size >= (16UL<<20) || size == old_size) { - printk(KERN_NOTICE - "PCI: MCFG area at %Lx reserved in %s\n", - addr, with_e820?"E820":"ACPI motherboard resources"); + printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n", + &cfg->res, + with_e820 ? "E820" : "ACPI motherboard resources"); valid = 1; if (old_size != size) { - /* update end_bus_number */ - cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1); - printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " - "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->pci_segment, - (unsigned int)cfg->start_bus_number, - (unsigned int)cfg->end_bus_number); + /* update end_bus */ + cfg->end_bus = cfg->start_bus + ((size>>20) - 1); + num_buses = cfg->end_bus - cfg->start_bus + 1; + cfg->res.end = cfg->res.start + + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", + cfg->segment, cfg->start_bus, cfg->end_bus); + printk(KERN_INFO PREFIX + "MMCONFIG for %04x [bus%02x-%02x] " + "at %pR (base %#lx) (size reduced!)\n", + cfg->segment, cfg->start_bus, cfg->end_bus, + &cfg->res, (unsigned long) cfg->address); } } @@ -471,45 +482,26 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, static void __init pci_mmcfg_reject_broken(int early) { - typeof(pci_mmcfg_config[0]) *cfg; - int i; + struct pci_mmcfg_region *cfg; - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) - return; - - for (i = 0; i < pci_mmcfg_config_num; i++) { + list_for_each_entry(cfg, &pci_mmcfg_list, list) { int valid = 0; - u64 addr, size; - - cfg = &pci_mmcfg_config[i]; - addr = cfg->start_bus_number; - addr <<= 20; - addr += cfg->address; - size = cfg->end_bus_number + 1 - cfg->start_bus_number; - size <<= 20; - printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " - "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->pci_segment, - (unsigned int)cfg->start_bus_number, - (unsigned int)cfg->end_bus_number); if (!early && !acpi_disabled) - valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); + valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); if (valid) continue; if (!early) - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " reserved in ACPI motherboard resources\n", - cfg->address); + printk(KERN_ERR FW_BUG PREFIX + "MMCONFIG at %pR not reserved in " + "ACPI motherboard resources\n", &cfg->res); /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1); + valid = is_mmconf_reserved(e820_all_mapped, cfg, 1); if (!valid) goto reject; @@ -518,34 +510,41 @@ static void __init pci_mmcfg_reject_broken(int early) return; reject: - printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); - pci_mmcfg_arch_free(); - kfree(pci_mmcfg_config); - pci_mmcfg_config = NULL; - pci_mmcfg_config_num = 0; + printk(KERN_INFO PREFIX "not using MMCONFIG\n"); + free_all_mmcfg(); } static int __initdata known_bridge; -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; +static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, + struct acpi_mcfg_allocation *cfg) +{ + int year; -/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ -struct acpi_mcfg_allocation *pci_mmcfg_config; -int pci_mmcfg_config_num; + if (cfg->address < 0xFFFFFFFF) + return 0; -static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) -{ if (!strcmp(mcfg->header.oem_id, "SGI")) - acpi_mcfg_64bit_base_addr = TRUE; + return 0; - return 0; + if (mcfg->header.revision >= 1) { + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && + year >= 2010) + return 0; + } + + printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " + "is above 4GB, ignored\n", cfg->pci_segment, + cfg->start_bus_number, cfg->end_bus_number, cfg->address); + return -EINVAL; } static int __init pci_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; + struct acpi_mcfg_allocation *cfg_table, *cfg; unsigned long i; - int config_size; + int entries; if (!header) return -EINVAL; @@ -553,38 +552,33 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) mcfg = (struct acpi_table_mcfg *)header; /* how many config structures do we have */ - pci_mmcfg_config_num = 0; + free_all_mmcfg(); + entries = 0; i = header->length - sizeof(struct acpi_table_mcfg); while (i >= sizeof(struct acpi_mcfg_allocation)) { - ++pci_mmcfg_config_num; + entries++; i -= sizeof(struct acpi_mcfg_allocation); }; - if (pci_mmcfg_config_num == 0) { + if (entries == 0) { printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); return -ENODEV; } - config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); - pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); - if (!pci_mmcfg_config) { - printk(KERN_WARNING PREFIX - "No memory for MCFG config tables\n"); - return -ENOMEM; - } - - memcpy(pci_mmcfg_config, &mcfg[1], config_size); - - acpi_mcfg_oem_check(mcfg); - - for (i = 0; i < pci_mmcfg_config_num; ++i) { - if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && - !acpi_mcfg_64bit_base_addr) { - printk(KERN_ERR PREFIX - "MMCONFIG not in low 4GB of memory\n"); - kfree(pci_mmcfg_config); - pci_mmcfg_config_num = 0; + cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1]; + for (i = 0; i < entries; i++) { + cfg = &cfg_table[i]; + if (acpi_mcfg_check_entry(mcfg, cfg)) { + free_all_mmcfg(); return -ENODEV; } + + if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, + cfg->end_bus_number, cfg->address) == NULL) { + printk(KERN_WARNING PREFIX + "no memory for MCFG entries\n"); + free_all_mmcfg(); + return -ENOMEM; + } } return 0; @@ -614,9 +608,7 @@ static void __init __pci_mmcfg_init(int early) pci_mmcfg_reject_broken(early); - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) + if (list_empty(&pci_mmcfg_list)) return; if (pci_mmcfg_arch_init()) @@ -648,9 +640,7 @@ static int __init pci_mmcfg_late_insert_resources(void) */ if ((pci_mmcfg_resources_inserted == 1) || (pci_probe & PCI_PROBE_MMCONF) == 0 || - (pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) + list_empty(&pci_mmcfg_list)) return 1; /* diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f10a7e94a84..90d5fd476ed 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -27,18 +27,10 @@ static int mmcfg_last_accessed_cpu; */ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { - struct acpi_mcfg_allocation *cfg; - int cfg_num; - - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = &pci_mmcfg_config[cfg_num]; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) - return cfg->address; - } + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); - /* Fall back to type 0 */ + if (cfg) + return cfg->address; return 0; } @@ -47,7 +39,7 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) */ static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) { - u32 dev_base = base | (bus << 20) | (devfn << 12); + u32 dev_base = base | PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12); int cpu = smp_processor_id(); if (dev_base != mmcfg_last_accessed_device || cpu != mmcfg_last_accessed_cpu) { diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 94349f8b2f9..e783841bd1d 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -12,38 +12,15 @@ #include <asm/e820.h> #include <asm/pci_x86.h> -/* Static virtual mapping of the MMCONFIG aperture */ -struct mmcfg_virt { - struct acpi_mcfg_allocation *cfg; - char __iomem *virt; -}; -static struct mmcfg_virt *pci_mmcfg_virt; - -static char __iomem *get_virt(unsigned int seg, unsigned bus) -{ - struct acpi_mcfg_allocation *cfg; - int cfg_num; - - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = pci_mmcfg_virt[cfg_num].cfg; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) - return pci_mmcfg_virt[cfg_num].virt; - } - - /* Fall back to type 0 */ - return NULL; -} +#define PREFIX "PCI: " static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { - char __iomem *addr; + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); - addr = get_virt(seg, bus); - if (!addr) - return NULL; - return addr + ((bus << 20) | (devfn << 12)); + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; } static int pci_mmcfg_read(unsigned int seg, unsigned int bus, @@ -109,42 +86,30 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; -static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) +static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) { void __iomem *addr; u64 start, size; + int num_buses; - start = cfg->start_bus_number; - start <<= 20; - start += cfg->address; - size = cfg->end_bus_number + 1 - cfg->start_bus_number; - size <<= 20; + start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); + num_buses = cfg->end_bus - cfg->start_bus + 1; + size = PCI_MMCFG_BUS_OFFSET(num_buses); addr = ioremap_nocache(start, size); - if (addr) { - printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", - start, start + size - 1); - addr -= cfg->start_bus_number << 20; - } + if (addr) + addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); return addr; } int __init pci_mmcfg_arch_init(void) { - int i; - pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) * - pci_mmcfg_config_num, GFP_KERNEL); - if (pci_mmcfg_virt == NULL) { - printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); - return 0; - } + struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; - pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]); - if (!pci_mmcfg_virt[i].virt) { - printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " - "segment %d\n", - pci_mmcfg_config[i].pci_segment); + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + cfg->virt = mcfg_ioremap(cfg); + if (!cfg->virt) { + printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n", + &cfg->res); pci_mmcfg_arch_free(); return 0; } @@ -155,19 +120,12 @@ int __init pci_mmcfg_arch_init(void) void __init pci_mmcfg_arch_free(void) { - int i; - - if (pci_mmcfg_virt == NULL) - return; + struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20)); - pci_mmcfg_virt[i].virt = NULL; - pci_mmcfg_virt[i].cfg = NULL; + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->virt) { + iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); + cfg->virt = NULL; } } - - kfree(pci_mmcfg_virt); - pci_mmcfg_virt = NULL; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b8e45f164e2..2b26dd5930c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -27,7 +27,9 @@ #include <linux/page-flags.h> #include <linux/highmem.h> #include <linux/console.h> +#include <linux/pci.h> +#include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> #include <xen/interface/physdev.h> @@ -1175,7 +1177,11 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } else { + /* Make sure ACS will be enabled */ + pci_request_acs(); } + xen_raw_console_write("about to get started...\n"); diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 7702118509a..c7b10b4298e 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -19,6 +19,7 @@ obj-y += acpi.o \ # All the builtin files are in the "acpi." module_param namespace. acpi-y += osl.o utils.o reboot.o +acpi-y += hest.o # sleep related files acpi-y += wakeup.o diff --git a/drivers/acpi/hest.c b/drivers/acpi/hest.c new file mode 100644 index 00000000000..4bb18c980ac --- /dev/null +++ b/drivers/acpi/hest.c @@ -0,0 +1,135 @@ +#include <linux/acpi.h> +#include <linux/pci.h> + +#define PREFIX "ACPI: " + +static inline unsigned long parse_acpi_hest_ia_machine_check(struct acpi_hest_ia_machine_check *p) +{ + return sizeof(*p) + + (sizeof(struct acpi_hest_ia_error_bank) * p->num_hardware_banks); +} + +static inline unsigned long parse_acpi_hest_ia_corrected(struct acpi_hest_ia_corrected *p) +{ + return sizeof(*p) + + (sizeof(struct acpi_hest_ia_error_bank) * p->num_hardware_banks); +} + +static inline unsigned long parse_acpi_hest_ia_nmi(struct acpi_hest_ia_nmi *p) +{ + return sizeof(*p); +} + +static inline unsigned long parse_acpi_hest_generic(struct acpi_hest_generic *p) +{ + return sizeof(*p); +} + +static inline unsigned int hest_match_pci(struct acpi_hest_aer_common *p, struct pci_dev *pci) +{ + return (0 == pci_domain_nr(pci->bus) && + p->bus == pci->bus->number && + p->device == PCI_SLOT(pci->devfn) && + p->function == PCI_FUNC(pci->devfn)); +} + +static unsigned long parse_acpi_hest_aer(void *hdr, int type, struct pci_dev *pci, int *firmware_first) +{ + struct acpi_hest_aer_common *p = hdr + sizeof(struct acpi_hest_header); + unsigned long rc=0; + u8 pcie_type = 0; + u8 bridge = 0; + switch (type) { + case ACPI_HEST_TYPE_AER_ROOT_PORT: + rc = sizeof(struct acpi_hest_aer_root); + pcie_type = PCI_EXP_TYPE_ROOT_PORT; + break; + case ACPI_HEST_TYPE_AER_ENDPOINT: + rc = sizeof(struct acpi_hest_aer); + pcie_type = PCI_EXP_TYPE_ENDPOINT; + break; + case ACPI_HEST_TYPE_AER_BRIDGE: + rc = sizeof(struct acpi_hest_aer_bridge); + if ((pci->class >> 16) == PCI_BASE_CLASS_BRIDGE) + bridge = 1; + break; + } + + if (p->flags & ACPI_HEST_GLOBAL) { + if ((pci->is_pcie && (pci->pcie_type == pcie_type)) || bridge) + *firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); + } + else + if (hest_match_pci(p, pci)) + *firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); + return rc; +} + +static int acpi_hest_firmware_first(struct acpi_table_header *stdheader, struct pci_dev *pci) +{ + struct acpi_table_hest *hest = (struct acpi_table_hest *)stdheader; + void *p = (void *)hest + sizeof(*hest); /* defined by the ACPI 4.0 spec */ + struct acpi_hest_header *hdr = p; + + int i; + int firmware_first = 0; + static unsigned char printed_unused = 0; + static unsigned char printed_reserved = 0; + + for (i=0, hdr=p; p < (((void *)hest) + hest->header.length) && i < hest->error_source_count; i++) { + switch (hdr->type) { + case ACPI_HEST_TYPE_IA32_CHECK: + p += parse_acpi_hest_ia_machine_check(p); + break; + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: + p += parse_acpi_hest_ia_corrected(p); + break; + case ACPI_HEST_TYPE_IA32_NMI: + p += parse_acpi_hest_ia_nmi(p); + break; + /* These three should never appear */ + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + if (!printed_unused) { + printk(KERN_DEBUG PREFIX + "HEST Error Source list contains an obsolete type (%d).\n", hdr->type); + printed_unused = 1; + } + break; + case ACPI_HEST_TYPE_AER_ROOT_PORT: + case ACPI_HEST_TYPE_AER_ENDPOINT: + case ACPI_HEST_TYPE_AER_BRIDGE: + p += parse_acpi_hest_aer(p, hdr->type, pci, &firmware_first); + break; + case ACPI_HEST_TYPE_GENERIC_ERROR: + p += parse_acpi_hest_generic(p); + break; + /* These should never appear either */ + case ACPI_HEST_TYPE_RESERVED: + default: + if (!printed_reserved) { + printk(KERN_DEBUG PREFIX + "HEST Error Source list contains a reserved type (%d).\n", hdr->type); + printed_reserved = 1; + } + break; + } + } + return firmware_first; +} + +int acpi_hest_firmware_first_pci(struct pci_dev *pci) +{ + acpi_status status = AE_NOT_FOUND; + struct acpi_table_header *hest = NULL; + status = acpi_get_table(ACPI_SIG_HEST, 1, &hest); + + if (ACPI_SUCCESS(status)) { + if (acpi_hest_firmware_first(hest, pci)) { + return 1; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(acpi_hest_firmware_first_pci); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b8578bb3f4c..05a31e55d27 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -42,6 +42,7 @@ #include <linux/module.h> #include <linux/scatterlist.h> +#include <xen/xen.h> #include <xen/xenbus.h> #include <xen/grant_table.h> #include <xen/events.h> diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index a6ee32b599a..b1a71638c77 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -25,6 +25,8 @@ #include <linux/types.h> #include <asm/xen/hypervisor.h> + +#include <xen/xen.h> #include <xen/page.h> #include <xen/events.h> #include <xen/interface/io/console.h> diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index b115726dc08..c721c0a23eb 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -21,7 +21,10 @@ #include <linux/errno.h> #include <linux/module.h> #include <linux/input.h> + #include <asm/xen/hypervisor.h> + +#include <xen/xen.h> #include <xen/events.h> #include <xen/page.h> #include <xen/interface/io/fbif.h> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index baa051d5bfb..a869b45d3d3 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -42,6 +42,7 @@ #include <linux/mm.h> #include <net/ip.h> +#include <xen/xen.h> #include <xen/xenbus.h> #include <xen/events.h> #include <xen/page.h> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index fdc864f9cf2..b1ecefa2a23 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -27,10 +27,10 @@ config PCI_LEGACY default y help Say Y here if you want to include support for the deprecated - pci_find_slot() and pci_find_device() APIs. Most drivers have - been converted over to using the proper hotplug APIs, so this - option serves to include/exclude only a few drivers that are - still using this API. + pci_find_device() API. Most drivers have been converted over + to using the proper hotplug APIs, so this option serves to + include/exclude only a few drivers that are still using this + API. config PCI_DEBUG bool "PCI Debugging" @@ -69,3 +69,10 @@ config PCI_IOV physical resources. If unsure, say N. + +config PCI_IOAPIC + bool + depends on PCI + depends on ACPI + depends on HOTPLUG + default y diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 4a7f11d8f43..4df48d58eaa 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -14,6 +14,8 @@ CFLAGS_legacy.o += -Wno-deprecated-declarations # Build PCI Express stuff if needed obj-$(CONFIG_PCIEPORTBUS) += pcie/ +obj-$(CONFIG_PCI_IOAPIC) += ioapic.o + obj-$(CONFIG_HOTPLUG) += hotplug.o # Build the PCI Hotplug drivers if we were asked to diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 416f6ac65b7..6cdc931f7c1 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -320,7 +320,7 @@ found: for (bus = dev->bus; bus; bus = bus->parent) { struct pci_dev *bridge = bus->self; - if (!bridge || !bridge->is_pcie || + if (!bridge || !pci_is_pcie(bridge) || bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) return 0; @@ -645,8 +645,11 @@ void __init detect_intel_iommu(void) "x2apic and Intr-remapping.\n"); #endif #ifdef CONFIG_DMAR - if (ret && !no_iommu && !iommu_detected && !dmar_disabled) + if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { iommu_detected = 1; + /* Make sure ACS will be enabled */ + pci_request_acs(); + } #endif #ifdef CONFIG_X86 if (ret) diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 3625b094bf7..6cd9f3c9887 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -6,18 +6,22 @@ obj-$(CONFIG_HOTPLUG_PCI) += pci_hotplug.o obj-$(CONFIG_HOTPLUG_PCI_COMPAQ) += cpqphp.o obj-$(CONFIG_HOTPLUG_PCI_IBM) += ibmphp.o -# pciehp should be linked before acpiphp in order to allow the native driver -# to attempt to bind first. We can then fall back to generic support. +# native drivers should be linked before acpiphp in order to allow the +# native driver to attempt to bind first. We can then fall back to +# generic support. obj-$(CONFIG_HOTPLUG_PCI_PCIE) += pciehp.o -obj-$(CONFIG_HOTPLUG_PCI_ACPI) += acpiphp.o -obj-$(CONFIG_HOTPLUG_PCI_ACPI_IBM) += acpiphp_ibm.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o +obj-$(CONFIG_HOTPLUG_PCI_ACPI) += acpiphp.o + +# acpiphp_ibm extends acpiphp, so should be linked afterwards. + +obj-$(CONFIG_HOTPLUG_PCI_ACPI_IBM) += acpiphp_ibm.o # Link this last so it doesn't claim devices that have a real hotplug driver obj-$(CONFIG_HOTPLUG_PCI_FAKE) += fakephp.o diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 0f32571b94d..3c76fc67cf0 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -362,6 +362,8 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) status = acpi_pci_osc_control_set(handle, flags); if (ACPI_SUCCESS(status)) goto got_one; + if (status == AE_SUPPORT) + goto no_control; kfree(string.pointer); string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL }; } @@ -394,10 +396,9 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) if (ACPI_FAILURE(status)) break; } - +no_control: dbg("Cannot get control of hotplug hardware for pci %s\n", pci_name(pdev)); - kfree(string.pointer); return -ENODEV; got_one: diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h index 7d938df7920..bab52047baa 100644 --- a/drivers/pci/hotplug/acpiphp.h +++ b/drivers/pci/hotplug/acpiphp.h @@ -146,12 +146,6 @@ struct acpiphp_attention_info struct module *owner; }; -struct acpiphp_ioapic { - struct pci_dev *dev; - u32 gsi_base; - struct list_head list; -}; - /* PCI bus bridge HID */ #define ACPI_PCI_HOST_HID "PNP0A03" diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index df1b0ea089d..8e952fdab76 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -52,8 +52,6 @@ #include "acpiphp.h" static LIST_HEAD(bridge_list); -static LIST_HEAD(ioapic_list); -static DEFINE_SPINLOCK(ioapic_list_lock); #define MY_NAME "acpiphp_glue" @@ -311,17 +309,13 @@ static void init_bridge_misc(struct acpiphp_bridge *bridge) /* find acpiphp_func from acpiphp_bridge */ static struct acpiphp_func *acpiphp_bridge_handle_to_function(acpi_handle handle) { - struct list_head *node, *l; struct acpiphp_bridge *bridge; struct acpiphp_slot *slot; struct acpiphp_func *func; - list_for_each(node, &bridge_list) { - bridge = list_entry(node, struct acpiphp_bridge, list); + list_for_each_entry(bridge, &bridge_list, list) { for (slot = bridge->slots; slot; slot = slot->next) { - list_for_each(l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, - sibling); + list_for_each_entry(func, &slot->funcs, sibling) { if (func->handle == handle) return func; } @@ -495,21 +489,19 @@ static int add_bridge(acpi_handle handle) static struct acpiphp_bridge *acpiphp_handle_to_bridge(acpi_handle handle) { - struct list_head *head; - list_for_each(head, &bridge_list) { - struct acpiphp_bridge *bridge = list_entry(head, - struct acpiphp_bridge, list); + struct acpiphp_bridge *bridge; + + list_for_each_entry(bridge, &bridge_list, list) if (bridge->handle == handle) return bridge; - } return NULL; } static void cleanup_bridge(struct acpiphp_bridge *bridge) { - struct list_head *list, *tmp; - struct acpiphp_slot *slot; + struct acpiphp_slot *slot, *next; + struct acpiphp_func *func, *tmp; acpi_status status; acpi_handle handle = bridge->handle; @@ -530,10 +522,8 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) slot = bridge->slots; while (slot) { - struct acpiphp_slot *next = slot->next; - list_for_each_safe (list, tmp, &slot->funcs) { - struct acpiphp_func *func; - func = list_entry(list, struct acpiphp_func, sibling); + next = slot->next; + list_for_each_entry_safe(func, tmp, &slot->funcs, sibling) { if (is_dock_device(func->handle)) { unregister_hotplug_dock_device(func->handle); unregister_dock_notifier(&func->nb); @@ -545,7 +535,7 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) if (ACPI_FAILURE(status)) err("failed to remove notify handler\n"); } - list_del(list); + list_del(&func->sibling); kfree(func); } acpiphp_unregister_hotplug_slot(slot); @@ -606,204 +596,17 @@ static void remove_bridge(acpi_handle handle) handle_hotplug_event_bridge); } -static struct pci_dev * get_apic_pci_info(acpi_handle handle) -{ - struct pci_dev *dev; - - dev = acpi_get_pci_dev(handle); - if (!dev) - return NULL; - - if ((dev->class != PCI_CLASS_SYSTEM_PIC_IOAPIC) && - (dev->class != PCI_CLASS_SYSTEM_PIC_IOXAPIC)) - { - pci_dev_put(dev); - return NULL; - } - - return dev; -} - -static int get_gsi_base(acpi_handle handle, u32 *gsi_base) -{ - acpi_status status; - int result = -1; - unsigned long long gsb; - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *obj; - void *table; - - status = acpi_evaluate_integer(handle, "_GSB", NULL, &gsb); - if (ACPI_SUCCESS(status)) { - *gsi_base = (u32)gsb; - return 0; - } - - status = acpi_evaluate_object(handle, "_MAT", NULL, &buffer); - if (ACPI_FAILURE(status) || !buffer.length || !buffer.pointer) - return -1; - - obj = buffer.pointer; - if (obj->type != ACPI_TYPE_BUFFER) - goto out; - - table = obj->buffer.pointer; - switch (((struct acpi_subtable_header *)table)->type) { - case ACPI_MADT_TYPE_IO_SAPIC: - *gsi_base = ((struct acpi_madt_io_sapic *)table)->global_irq_base; - result = 0; - break; - case ACPI_MADT_TYPE_IO_APIC: - *gsi_base = ((struct acpi_madt_io_apic *)table)->global_irq_base; - result = 0; - break; - default: - break; - } - out: - kfree(buffer.pointer); - return result; -} - -static acpi_status -ioapic_add(acpi_handle handle, u32 lvl, void *context, void **rv) -{ - acpi_status status; - unsigned long long sta; - acpi_handle tmp; - struct pci_dev *pdev; - u32 gsi_base; - u64 phys_addr; - struct acpiphp_ioapic *ioapic; - - /* Evaluate _STA if present */ - status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); - if (ACPI_SUCCESS(status) && sta != ACPI_STA_ALL) - return AE_CTRL_DEPTH; - - /* Scan only PCI bus scope */ - status = acpi_get_handle(handle, "_HID", &tmp); - if (ACPI_SUCCESS(status)) - return AE_CTRL_DEPTH; - - if (get_gsi_base(handle, &gsi_base)) - return AE_OK; - - ioapic = kmalloc(sizeof(*ioapic), GFP_KERNEL); - if (!ioapic) - return AE_NO_MEMORY; - - pdev = get_apic_pci_info(handle); - if (!pdev) - goto exit_kfree; - - if (pci_enable_device(pdev)) - goto exit_pci_dev_put; - - pci_set_master(pdev); - - if (pci_request_region(pdev, 0, "I/O APIC(acpiphp)")) - goto exit_pci_disable_device; - - phys_addr = pci_resource_start(pdev, 0); - if (acpi_register_ioapic(handle, phys_addr, gsi_base)) - goto exit_pci_release_region; - - ioapic->gsi_base = gsi_base; - ioapic->dev = pdev; - spin_lock(&ioapic_list_lock); - list_add_tail(&ioapic->list, &ioapic_list); - spin_unlock(&ioapic_list_lock); - - return AE_OK; - - exit_pci_release_region: - pci_release_region(pdev, 0); - exit_pci_disable_device: - pci_disable_device(pdev); - exit_pci_dev_put: - pci_dev_put(pdev); - exit_kfree: - kfree(ioapic); - - return AE_OK; -} - -static acpi_status -ioapic_remove(acpi_handle handle, u32 lvl, void *context, void **rv) -{ - acpi_status status; - unsigned long long sta; - acpi_handle tmp; - u32 gsi_base; - struct acpiphp_ioapic *pos, *n, *ioapic = NULL; - - /* Evaluate _STA if present */ - status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); - if (ACPI_SUCCESS(status) && sta != ACPI_STA_ALL) - return AE_CTRL_DEPTH; - - /* Scan only PCI bus scope */ - status = acpi_get_handle(handle, "_HID", &tmp); - if (ACPI_SUCCESS(status)) - return AE_CTRL_DEPTH; - - if (get_gsi_base(handle, &gsi_base)) - return AE_OK; - - acpi_unregister_ioapic(handle, gsi_base); - - spin_lock(&ioapic_list_lock); - list_for_each_entry_safe(pos, n, &ioapic_list, list) { - if (pos->gsi_base != gsi_base) - continue; - ioapic = pos; - list_del(&ioapic->list); - break; - } - spin_unlock(&ioapic_list_lock); - - if (!ioapic) - return AE_OK; - - pci_release_region(ioapic->dev, 0); - pci_disable_device(ioapic->dev); - pci_dev_put(ioapic->dev); - kfree(ioapic); - - return AE_OK; -} - -static int acpiphp_configure_ioapics(acpi_handle handle) -{ - ioapic_add(handle, 0, NULL, NULL); - acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - ACPI_UINT32_MAX, ioapic_add, NULL, NULL, NULL); - return 0; -} - -static int acpiphp_unconfigure_ioapics(acpi_handle handle) -{ - ioapic_remove(handle, 0, NULL, NULL); - acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - ACPI_UINT32_MAX, ioapic_remove, NULL, NULL, NULL); - return 0; -} - static int power_on_slot(struct acpiphp_slot *slot) { acpi_status status; struct acpiphp_func *func; - struct list_head *l; int retval = 0; /* if already enabled, just skip */ if (slot->flags & SLOT_POWEREDON) goto err_exit; - list_for_each (l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, sibling); - + list_for_each_entry(func, &slot->funcs, sibling) { if (func->flags & FUNC_HAS_PS0) { dbg("%s: executing _PS0\n", __func__); status = acpi_evaluate_object(func->handle, "_PS0", NULL, NULL); @@ -829,7 +632,6 @@ static int power_off_slot(struct acpiphp_slot *slot) { acpi_status status; struct acpiphp_func *func; - struct list_head *l; int retval = 0; @@ -837,9 +639,7 @@ static int power_off_slot(struct acpiphp_slot *slot) if ((slot->flags & SLOT_POWEREDON) == 0) goto err_exit; - list_for_each (l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, sibling); - + list_for_each_entry(func, &slot->funcs, sibling) { if (func->flags & FUNC_HAS_PS3) { status = acpi_evaluate_object(func->handle, "_PS3", NULL, NULL); if (ACPI_FAILURE(status)) { @@ -966,7 +766,6 @@ static int __ref enable_device(struct acpiphp_slot *slot) { struct pci_dev *dev; struct pci_bus *bus = slot->bridge->pci_bus; - struct list_head *l; struct acpiphp_func *func; int retval = 0; int num, max, pass; @@ -1006,21 +805,16 @@ static int __ref enable_device(struct acpiphp_slot *slot) } } - list_for_each (l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, sibling); + list_for_each_entry(func, &slot->funcs, sibling) acpiphp_bus_add(func); - } pci_bus_assign_resources(bus); acpiphp_sanitize_bus(bus); acpiphp_set_hpp_values(bus); - list_for_each_entry(func, &slot->funcs, sibling) - acpiphp_configure_ioapics(func->handle); pci_enable_bridges(bus); pci_bus_add_devices(bus); - list_for_each (l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, sibling); + list_for_each_entry(func, &slot->funcs, sibling) { dev = pci_get_slot(bus, PCI_DEVFN(slot->device, func->function)); if (!dev) @@ -1091,7 +885,6 @@ static int disable_device(struct acpiphp_slot *slot) } list_for_each_entry(func, &slot->funcs, sibling) { - acpiphp_unconfigure_ioapics(func->handle); acpiphp_bus_trim(func->handle); } @@ -1119,12 +912,9 @@ static unsigned int get_slot_status(struct acpiphp_slot *slot) acpi_status status; unsigned long long sta = 0; u32 dvid; - struct list_head *l; struct acpiphp_func *func; - list_for_each (l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, sibling); - + list_for_each_entry(func, &slot->funcs, sibling) { if (func->flags & FUNC_HAS_STA) { status = acpi_evaluate_integer(func->handle, "_STA", NULL, &sta); if (ACPI_SUCCESS(status) && sta) @@ -1152,13 +942,10 @@ int acpiphp_eject_slot(struct acpiphp_slot *slot) { acpi_status status; struct acpiphp_func *func; - struct list_head *l; struct acpi_object_list arg_list; union acpi_object arg; - list_for_each (l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, sibling); - + list_for_each_entry(func, &slot->funcs, sibling) { /* We don't want to call _EJ0 on non-existing functions. */ if ((func->flags & FUNC_HAS_EJ0)) { /* _EJ0 method take one argument */ @@ -1275,7 +1062,6 @@ static int acpiphp_configure_bridge (acpi_handle handle) acpiphp_sanitize_bus(bus); acpiphp_set_hpp_values(bus); pci_enable_bridges(bus); - acpiphp_configure_ioapics(handle); return 0; } @@ -1542,7 +1328,7 @@ int __init acpiphp_get_num_slots(void) struct acpiphp_bridge *bridge; int num_slots = 0; - list_for_each_entry (bridge, &bridge_list, list) { + list_for_each_entry(bridge, &bridge_list, list) { dbg("Bus %04x:%02x has %d slot%s\n", pci_domain_nr(bridge->pci_bus), bridge->pci_bus->number, bridge->nr_slots, diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c index 83f337c891a..c7084f0eca5 100644 --- a/drivers/pci/hotplug/ibmphp_hpc.c +++ b/drivers/pci/hotplug/ibmphp_hpc.c @@ -890,7 +890,7 @@ static int poll_hpc(void *data) msleep(POLL_INTERVAL_SEC * 1000); if (kthread_should_stop()) - break; + goto out_sleep; down (&semOperations); @@ -904,6 +904,7 @@ static int poll_hpc(void *data) /* give up the hardware semaphore */ up (&semOperations); /* sleep for a short time just for good measure */ +out_sleep: msleep(100); } up (&sem_exit); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 0325d989bb4..38183a534b6 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -68,26 +68,26 @@ static DEFINE_MUTEX(pci_hp_mutex); static char *pci_bus_speed_strings[] = { "33 MHz PCI", /* 0x00 */ "66 MHz PCI", /* 0x01 */ - "66 MHz PCIX", /* 0x02 */ - "100 MHz PCIX", /* 0x03 */ - "133 MHz PCIX", /* 0x04 */ + "66 MHz PCI-X", /* 0x02 */ + "100 MHz PCI-X", /* 0x03 */ + "133 MHz PCI-X", /* 0x04 */ NULL, /* 0x05 */ NULL, /* 0x06 */ NULL, /* 0x07 */ NULL, /* 0x08 */ - "66 MHz PCIX 266", /* 0x09 */ - "100 MHz PCIX 266", /* 0x0a */ - "133 MHz PCIX 266", /* 0x0b */ + "66 MHz PCI-X 266", /* 0x09 */ + "100 MHz PCI-X 266", /* 0x0a */ + "133 MHz PCI-X 266", /* 0x0b */ NULL, /* 0x0c */ NULL, /* 0x0d */ NULL, /* 0x0e */ NULL, /* 0x0f */ NULL, /* 0x10 */ - "66 MHz PCIX 533", /* 0x11 */ - "100 MHz PCIX 533", /* 0x12 */ - "133 MHz PCIX 533", /* 0x13 */ - "2.5 GT/s PCI-E", /* 0x14 */ - "5.0 GT/s PCI-E", /* 0x15 */ + "66 MHz PCI-X 533", /* 0x11 */ + "100 MHz PCI-X 533", /* 0x12 */ + "133 MHz PCI-X 533", /* 0x13 */ + "2.5 GT/s PCIe", /* 0x14 */ + "5.0 GT/s PCIe", /* 0x15 */ }; #ifdef CONFIG_HOTPLUG_PCI_CPCI diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 3070f77eb56..4ed76b47b6d 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -91,7 +91,6 @@ struct controller { struct slot *slot; wait_queue_head_t queue; /* sleep & wake process */ u32 slot_cap; - u8 cap_base; struct timer_list poll_timer; unsigned int cmd_busy:1; unsigned int no_cmd_complete:1; diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 37c8d3d0323..b09b083011d 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -87,7 +87,8 @@ static int __init dummy_probe(struct pcie_device *dev) /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ if (pciehp_get_hp_hw_control_from_firmware(pdev)) return -ENODEV; - if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP))) + pos = pci_pcie_cap(pdev); + if (!pos) return -ENODEV; pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap); slot = kzalloc(sizeof(*slot), GFP_KERNEL); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index bc234719b1d..5674b2075bd 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -72,18 +72,6 @@ static int get_adapter_status (struct hotplug_slot *slot, u8 *value); static int get_max_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); static int get_cur_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); -static struct hotplug_slot_ops pciehp_hotplug_slot_ops = { - .set_attention_status = set_attention_status, - .enable_slot = enable_slot, - .disable_slot = disable_slot, - .get_power_status = get_power_status, - .get_attention_status = get_attention_status, - .get_latch_status = get_latch_status, - .get_adapter_status = get_adapter_status, - .get_max_bus_speed = get_max_bus_speed, - .get_cur_bus_speed = get_cur_bus_speed, -}; - /** * release_slot - free up the memory used by a slot * @hotplug_slot: slot to free @@ -95,6 +83,7 @@ static void release_slot(struct hotplug_slot *hotplug_slot) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, hotplug_slot_name(hotplug_slot)); + kfree(hotplug_slot->ops); kfree(hotplug_slot->info); kfree(hotplug_slot); } @@ -104,6 +93,7 @@ static int init_slot(struct controller *ctrl) struct slot *slot = ctrl->slot; struct hotplug_slot *hotplug = NULL; struct hotplug_slot_info *info = NULL; + struct hotplug_slot_ops *ops = NULL; char name[SLOT_NAME_SIZE]; int retval = -ENOMEM; @@ -115,11 +105,28 @@ static int init_slot(struct controller *ctrl) if (!info) goto out; + /* Setup hotplug slot ops */ + ops = kzalloc(sizeof(*ops), GFP_KERNEL); + if (!ops) + goto out; + ops->enable_slot = enable_slot; + ops->disable_slot = disable_slot; + ops->get_power_status = get_power_status; + ops->get_adapter_status = get_adapter_status; + ops->get_max_bus_speed = get_max_bus_speed; + ops->get_cur_bus_speed = get_cur_bus_speed; + if (MRL_SENS(ctrl)) + ops->get_latch_status = get_latch_status; + if (ATTN_LED(ctrl)) { + ops->get_attention_status = get_attention_status; + ops->set_attention_status = set_attention_status; + } + /* register this slot with the hotplug pci core */ hotplug->info = info; hotplug->private = slot; hotplug->release = &release_slot; - hotplug->ops = &pciehp_hotplug_slot_ops; + hotplug->ops = ops; slot->hotplug_slot = hotplug; snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl)); @@ -128,17 +135,12 @@ static int init_slot(struct controller *ctrl) ctrl->pcie->port->subordinate->number, PSN(ctrl)); retval = pci_hp_register(hotplug, ctrl->pcie->port->subordinate, 0, name); - if (retval) { + if (retval) ctrl_err(ctrl, "pci_hp_register failed with error %d\n", retval); - goto out; - } - get_power_status(hotplug, &info->power_status); - get_attention_status(hotplug, &info->attention_status); - get_latch_status(hotplug, &info->latch_status); - get_adapter_status(hotplug, &info->adapter_status); out: if (retval) { + kfree(ops); kfree(info); kfree(hotplug); } @@ -160,12 +162,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - hotplug_slot->info->attention_status = status; - - if (ATTN_LED(slot->ctrl)) - pciehp_set_attention_status(slot, status); - - return 0; + return pciehp_set_attention_status(slot, status); } @@ -193,92 +190,62 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) { struct slot *slot = hotplug_slot->private; - int retval; ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = pciehp_get_power_status(slot, value); - if (retval < 0) - *value = hotplug_slot->info->power_status; - - return 0; + return pciehp_get_power_status(slot, value); } static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value) { struct slot *slot = hotplug_slot->private; - int retval; ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = pciehp_get_attention_status(slot, value); - if (retval < 0) - *value = hotplug_slot->info->attention_status; - - return 0; + return pciehp_get_attention_status(slot, value); } static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value) { struct slot *slot = hotplug_slot->private; - int retval; ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = pciehp_get_latch_status(slot, value); - if (retval < 0) - *value = hotplug_slot->info->latch_status; - - return 0; + return pciehp_get_latch_status(slot, value); } static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) { struct slot *slot = hotplug_slot->private; - int retval; ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = pciehp_get_adapter_status(slot, value); - if (retval < 0) - *value = hotplug_slot->info->adapter_status; - - return 0; + return pciehp_get_adapter_status(slot, value); } static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value) { struct slot *slot = hotplug_slot->private; - int retval; ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = pciehp_get_max_link_speed(slot, value); - if (retval < 0) - *value = PCI_SPEED_UNKNOWN; - - return 0; + return pciehp_get_max_link_speed(slot, value); } static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value) { struct slot *slot = hotplug_slot->private; - int retval; ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = pciehp_get_cur_link_speed(slot, value); - if (retval < 0) - *value = PCI_SPEED_UNKNOWN; - - return 0; + return pciehp_get_cur_link_speed(slot, value); } static int pciehp_probe(struct pcie_device *dev) @@ -286,14 +253,13 @@ static int pciehp_probe(struct pcie_device *dev) int rc; struct controller *ctrl; struct slot *slot; - u8 value; - struct pci_dev *pdev = dev->port; + u8 occupied, poweron; if (pciehp_force) dev_info(&dev->device, "Bypassing BIOS check for pciehp use on %s\n", - pci_name(pdev)); - else if (pciehp_get_hp_hw_control_from_firmware(pdev)) + pci_name(dev->port)); + else if (pciehp_get_hp_hw_control_from_firmware(dev->port)) goto err_out_none; ctrl = pcie_init(dev); @@ -318,23 +284,18 @@ static int pciehp_probe(struct pcie_device *dev) rc = pcie_init_notification(ctrl); if (rc) { ctrl_err(ctrl, "Notification initialization failed\n"); - goto err_out_release_ctlr; + goto err_out_free_ctrl_slot; } /* Check if slot is occupied */ slot = ctrl->slot; - pciehp_get_adapter_status(slot, &value); - if (value) { - if (pciehp_force) - pciehp_enable_slot(slot); - } else { - /* Power off slot if not occupied */ - if (POWER_CTRL(ctrl)) { - rc = pciehp_power_off_slot(slot); - if (rc) - goto err_out_free_ctrl_slot; - } - } + pciehp_get_adapter_status(slot, &occupied); + pciehp_get_power_status(slot, &poweron); + if (occupied && pciehp_force) + pciehp_enable_slot(slot); + /* If empty slot's power status is on, turn power off */ + if (!occupied && poweron && POWER_CTRL(ctrl)) + pciehp_power_off_slot(slot); return 0; diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index 84487d126e4..d6ac1b261dd 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -142,23 +142,9 @@ u8 pciehp_handle_power_fault(struct slot *p_slot) /* power fault */ ctrl_dbg(ctrl, "Power fault interrupt received\n"); - - if (!pciehp_query_power_fault(p_slot)) { - /* - * power fault Cleared - */ - ctrl_info(ctrl, "Power fault cleared on Slot(%s)\n", - slot_name(p_slot)); - event_type = INT_POWER_FAULT_CLEAR; - } else { - /* - * power fault - */ - ctrl_info(ctrl, "Power fault on Slot(%s)\n", slot_name(p_slot)); - event_type = INT_POWER_FAULT; - ctrl_info(ctrl, "Power fault bit %x set\n", 0); - } - + ctrl_err(ctrl, "Power fault on slot %s\n", slot_name(p_slot)); + event_type = INT_POWER_FAULT; + ctrl_info(ctrl, "Power fault bit %x set\n", 0); queue_interrupt_event(p_slot, event_type); return 1; @@ -224,13 +210,12 @@ static int board_added(struct slot *p_slot) retval = pciehp_check_link_status(ctrl); if (retval) { ctrl_err(ctrl, "Failed to check link status\n"); - set_slot_off(ctrl, p_slot); - return retval; + goto err_exit; } /* Check for a power fault */ - if (pciehp_query_power_fault(p_slot)) { - ctrl_dbg(ctrl, "Power fault detected\n"); + if (ctrl->power_fault_detected || pciehp_query_power_fault(p_slot)) { + ctrl_err(ctrl, "Power fault on slot %s\n", slot_name(p_slot)); retval = -EIO; goto err_exit; } @@ -363,25 +348,6 @@ void pciehp_queue_pushbutton_work(struct work_struct *work) mutex_unlock(&p_slot->lock); } -static int update_slot_info(struct slot *slot) -{ - struct hotplug_slot_info *info; - int result; - - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - pciehp_get_power_status(slot, &info->power_status); - pciehp_get_attention_status(slot, &info->attention_status); - pciehp_get_latch_status(slot, &info->latch_status); - pciehp_get_adapter_status(slot, &info->adapter_status); - - result = pci_hp_change_slot_info(slot->hotplug_slot, info); - kfree (info); - return result; -} - /* * Note: This function must be called with slot->lock held */ @@ -442,7 +408,6 @@ static void handle_button_press_event(struct slot *p_slot) * to hot-add or hot-remove is undergoing */ ctrl_info(ctrl, "Button ignore on Slot(%s)\n", slot_name(p_slot)); - update_slot_info(p_slot); break; default: ctrl_warn(ctrl, "Not a valid state\n"); @@ -500,11 +465,9 @@ static void interrupt_event_handler(struct work_struct *work) if (!HP_SUPR_RM(ctrl)) break; ctrl_dbg(ctrl, "Surprise Removal\n"); - update_slot_info(p_slot); handle_surprise_event(p_slot); break; default: - update_slot_info(p_slot); break; } mutex_unlock(&p_slot->lock); @@ -547,9 +510,6 @@ int pciehp_enable_slot(struct slot *p_slot) if (rc) { pciehp_get_latch_status(p_slot, &getstatus); } - - update_slot_info(p_slot); - return rc; } @@ -590,10 +550,7 @@ int pciehp_disable_slot(struct slot *p_slot) } } - ret = remove_board(p_slot); - update_slot_info(p_slot); - - return ret; + return remove_board(p_slot); } int pciehp_sysfs_enable_slot(struct slot *p_slot) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 9ef4605c1ef..10040d58c8e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -45,25 +45,25 @@ static atomic_t pciehp_num_controllers = ATOMIC_INIT(0); static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value) { struct pci_dev *dev = ctrl->pcie->port; - return pci_read_config_word(dev, ctrl->cap_base + reg, value); + return pci_read_config_word(dev, pci_pcie_cap(dev) + reg, value); } static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value) { struct pci_dev *dev = ctrl->pcie->port; - return pci_read_config_dword(dev, ctrl->cap_base + reg, value); + return pci_read_config_dword(dev, pci_pcie_cap(dev) + reg, value); } static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value) { struct pci_dev *dev = ctrl->pcie->port; - return pci_write_config_word(dev, ctrl->cap_base + reg, value); + return pci_write_config_word(dev, pci_pcie_cap(dev) + reg, value); } static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value) { struct pci_dev *dev = ctrl->pcie->port; - return pci_write_config_dword(dev, ctrl->cap_base + reg, value); + return pci_write_config_dword(dev, pci_pcie_cap(dev) + reg, value); } /* Power Control Command */ @@ -318,8 +318,8 @@ int pciehp_get_attention_status(struct slot *slot, u8 *status) return retval; } - ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n", - __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_ctrl); + ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n", __func__, + pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl); atten_led_state = (slot_ctrl & PCI_EXP_SLTCTL_AIC) >> 6; @@ -356,8 +356,8 @@ int pciehp_get_power_status(struct slot *slot, u8 *status) ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__); return retval; } - ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n", - __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_ctrl); + ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n", __func__, + pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl); pwr_state = (slot_ctrl & PCI_EXP_SLTCTL_PCC) >> 10; @@ -427,27 +427,24 @@ int pciehp_set_attention_status(struct slot *slot, u8 value) struct controller *ctrl = slot->ctrl; u16 slot_cmd; u16 cmd_mask; - int rc; cmd_mask = PCI_EXP_SLTCTL_AIC; switch (value) { - case 0 : /* turn off */ - slot_cmd = 0x00C0; - break; - case 1: /* turn on */ - slot_cmd = 0x0040; - break; - case 2: /* turn blink */ - slot_cmd = 0x0080; - break; - default: - return -1; + case 0 : /* turn off */ + slot_cmd = 0x00C0; + break; + case 1: /* turn on */ + slot_cmd = 0x0040; + break; + case 2: /* turn blink */ + slot_cmd = 0x0080; + break; + default: + return -EINVAL; } - rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); - - return rc; + ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, + pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); + return pcie_write_cmd(ctrl, slot_cmd, cmd_mask); } void pciehp_green_led_on(struct slot *slot) @@ -459,8 +456,8 @@ void pciehp_green_led_on(struct slot *slot) slot_cmd = 0x0100; cmd_mask = PCI_EXP_SLTCTL_PIC; pcie_write_cmd(ctrl, slot_cmd, cmd_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); + ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, + pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); } void pciehp_green_led_off(struct slot *slot) @@ -472,8 +469,8 @@ void pciehp_green_led_off(struct slot *slot) slot_cmd = 0x0300; cmd_mask = PCI_EXP_SLTCTL_PIC; pcie_write_cmd(ctrl, slot_cmd, cmd_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); + ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, + pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); } void pciehp_green_led_blink(struct slot *slot) @@ -485,8 +482,8 @@ void pciehp_green_led_blink(struct slot *slot) slot_cmd = 0x0200; cmd_mask = PCI_EXP_SLTCTL_PIC; pcie_write_cmd(ctrl, slot_cmd, cmd_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); + ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, + pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); } int pciehp_power_on_slot(struct slot * slot) @@ -514,97 +511,38 @@ int pciehp_power_on_slot(struct slot * slot) return retval; } } + ctrl->power_fault_detected = 0; slot_cmd = POWER_ON; cmd_mask = PCI_EXP_SLTCTL_PCC; - if (!pciehp_poll_mode) { - /* Enable power fault detection turned off at power off time */ - slot_cmd |= PCI_EXP_SLTCTL_PFDE; - cmd_mask |= PCI_EXP_SLTCTL_PFDE; - } - retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); if (retval) { ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd); return retval; } - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); + ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, + pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); - ctrl->power_fault_detected = 0; return retval; } -static inline int pcie_mask_bad_dllp(struct controller *ctrl) -{ - struct pci_dev *dev = ctrl->pcie->port; - int pos; - u32 reg; - - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (!pos) - return 0; - pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, ®); - if (reg & PCI_ERR_COR_BAD_DLLP) - return 0; - reg |= PCI_ERR_COR_BAD_DLLP; - pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg); - return 1; -} - -static inline void pcie_unmask_bad_dllp(struct controller *ctrl) -{ - struct pci_dev *dev = ctrl->pcie->port; - u32 reg; - int pos; - - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (!pos) - return; - pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, ®); - if (!(reg & PCI_ERR_COR_BAD_DLLP)) - return; - reg &= ~PCI_ERR_COR_BAD_DLLP; - pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg); -} - int pciehp_power_off_slot(struct slot * slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; u16 cmd_mask; - int retval = 0; - int changed; - - /* - * Set Bad DLLP Mask bit in Correctable Error Mask - * Register. This is the workaround against Bad DLLP error - * that sometimes happens during turning power off the slot - * which conforms to PCI Express 1.0a spec. - */ - changed = pcie_mask_bad_dllp(ctrl); + int retval; slot_cmd = POWER_OFF; cmd_mask = PCI_EXP_SLTCTL_PCC; - if (!pciehp_poll_mode) { - /* Disable power fault detection */ - slot_cmd &= ~PCI_EXP_SLTCTL_PFDE; - cmd_mask |= PCI_EXP_SLTCTL_PFDE; - } - retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); if (retval) { ctrl_err(ctrl, "Write command failed!\n"); - retval = -1; - goto out; + return retval; } - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); - out: - if (changed) - pcie_unmask_bad_dllp(ctrl); - - return retval; + ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, + pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); + return 0; } static irqreturn_t pcie_isr(int irq, void *dev_id) @@ -840,11 +778,19 @@ int pcie_enable_notification(struct controller *ctrl) { u16 cmd, mask; + /* + * TBD: Power fault detected software notification support. + * + * Power fault detected software notification is not enabled + * now, because it caused power fault detected interrupt storm + * on some machines. On those machines, power fault detected + * bit in the slot status register was set again immediately + * when it is cleared in the interrupt service routine, and + * next power fault detected interrupt was notified again. + */ cmd = PCI_EXP_SLTCTL_PDCE; if (ATTN_BUTTN(ctrl)) cmd |= PCI_EXP_SLTCTL_ABPE; - if (POWER_CTRL(ctrl)) - cmd |= PCI_EXP_SLTCTL_PFDE; if (MRL_SENS(ctrl)) cmd |= PCI_EXP_SLTCTL_MRLSCE; if (!pciehp_poll_mode) @@ -866,7 +812,8 @@ static void pcie_disable_notification(struct controller *ctrl) u16 mask; mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE | PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE | - PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE); + PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE | + PCI_EXP_SLTCTL_DLLSCE); if (pcie_write_cmd(ctrl, 0, mask)) ctrl_warn(ctrl, "Cannot disable software notification\n"); } @@ -934,7 +881,8 @@ static inline void dbg_ctrl(struct controller *ctrl) pdev->subsystem_device); ctrl_info(ctrl, " Subsystem Vendor ID : 0x%04x\n", pdev->subsystem_vendor); - ctrl_info(ctrl, " PCIe Cap offset : 0x%02x\n", ctrl->cap_base); + ctrl_info(ctrl, " PCIe Cap offset : 0x%02x\n", + pci_pcie_cap(pdev)); for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { if (!pci_resource_len(pdev, i)) continue; @@ -978,8 +926,7 @@ struct controller *pcie_init(struct pcie_device *dev) goto abort; } ctrl->pcie = dev; - ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (!ctrl->cap_base) { + if (!pci_pcie_cap(pdev)) { ctrl_err(ctrl, "Cannot find PCI Express capability\n"); goto abort_ctrl; } diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index cc8ec3aa41a..80b461c9855 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -43,7 +43,7 @@ static void program_hpp_type0(struct pci_dev *dev, struct hpp_type0 *hpp) * Perhaps we *should* use default settings for PCIe, but * pciehp didn't, so we won't either. */ - if (dev->is_pcie) + if (pci_is_pcie(dev)) return; dev_info(&dev->dev, "using default PCI settings\n"); hpp = &pci_default_type0; @@ -102,7 +102,7 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) return; /* Find PCI Express capability */ - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(dev); if (!pos) return; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 9261327b49f..8d615942631 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1611,7 +1611,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, return ret; parent = parent->bus->self; } - if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ + if (pci_is_pcie(tmp)) /* this is a PCIE-to-PCI bridge */ return domain_context_mapping_one(domain, pci_domain_nr(tmp->subordinate), tmp->subordinate->number, 0, @@ -1651,7 +1651,7 @@ static int domain_context_mapped(struct pci_dev *pdev) return ret; parent = parent->bus->self; } - if (tmp->is_pcie) + if (pci_is_pcie(tmp)) return device_context_mapped(iommu, tmp->subordinate->number, 0); else @@ -1821,7 +1821,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) dev_tmp = pci_find_upstream_pcie_bridge(pdev); if (dev_tmp) { - if (dev_tmp->is_pcie) { + if (pci_is_pcie(dev_tmp)) { bus = dev_tmp->subordinate->number; devfn = 0; } else { @@ -2182,7 +2182,7 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) * the 1:1 domain, just in _case_ one of their siblings turns out * not to be able to map all of memory. */ - if (!pdev->is_pcie) { + if (!pci_is_pcie(pdev)) { if (!pci_is_root_bus(pdev->bus)) return 0; if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) @@ -3319,7 +3319,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, parent->devfn); parent = parent->bus->self; } - if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ + if (pci_is_pcie(tmp)) /* this is a PCIE-to-PCI bridge */ iommu_detach_dev(iommu, tmp->subordinate->number, 0); else /* this is a legacy PCI bridge */ diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 3b3658669be..1487bf2be86 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -520,7 +520,7 @@ int set_msi_sid(struct irte *irte, struct pci_dev *dev) return -1; /* PCIe device or Root Complex integrated PCI device */ - if (dev->is_pcie || !dev->bus->parent) { + if (pci_is_pcie(dev) || !dev->bus->parent) { set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, (dev->bus->number << 8) | dev->devfn); return 0; @@ -528,7 +528,7 @@ int set_msi_sid(struct irte *irte, struct pci_dev *dev) bridge = pci_find_upstream_pcie_bridge(dev); if (bridge) { - if (bridge->is_pcie) /* this is a PCIE-to-PCI/PCIX bridge */ + if (pci_is_pcie(bridge))/* this is a PCIE-to-PCI/PCIX bridge */ set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, (bridge->bus->number << 8) | dev->bus->number); else /* this is a legacy PCI bridge */ diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c new file mode 100644 index 00000000000..3e0d7b5dd1b --- /dev/null +++ b/drivers/pci/ioapic.c @@ -0,0 +1,127 @@ +/* + * IOAPIC/IOxAPIC/IOSAPIC driver + * + * Copyright (C) 2009 Fujitsu Limited. + * (c) Copyright 2009 Hewlett-Packard Development Company, L.P. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * This driver manages PCI I/O APICs added by hotplug after boot. We try to + * claim all I/O APIC PCI devices, but those present at boot were registered + * when we parsed the ACPI MADT, so we'll fail when we try to re-register + * them. + */ + +#include <linux/pci.h> +#include <linux/acpi.h> +#include <acpi/acpi_bus.h> + +struct ioapic { + acpi_handle handle; + u32 gsi_base; +}; + +static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent) +{ + acpi_handle handle; + acpi_status status; + unsigned long long gsb; + struct ioapic *ioapic; + u64 addr; + int ret; + char *type; + + handle = DEVICE_ACPI_HANDLE(&dev->dev); + if (!handle) + return -EINVAL; + + status = acpi_evaluate_integer(handle, "_GSB", NULL, &gsb); + if (ACPI_FAILURE(status)) + return -EINVAL; + + /* + * The previous code in acpiphp evaluated _MAT if _GSB failed, but + * ACPI spec 4.0 sec 6.2.2 requires _GSB for hot-pluggable I/O APICs. + */ + + ioapic = kzalloc(sizeof(*ioapic), GFP_KERNEL); + if (!ioapic) + return -ENOMEM; + + ioapic->handle = handle; + ioapic->gsi_base = (u32) gsb; + + if (dev->class == PCI_CLASS_SYSTEM_PIC_IOAPIC) + type = "IOAPIC"; + else + type = "IOxAPIC"; + + ret = pci_enable_device(dev); + if (ret < 0) + goto exit_free; + + pci_set_master(dev); + + if (pci_request_region(dev, 0, type)) + goto exit_disable; + + addr = pci_resource_start(dev, 0); + if (acpi_register_ioapic(ioapic->handle, addr, ioapic->gsi_base)) + goto exit_release; + + pci_set_drvdata(dev, ioapic); + dev_info(&dev->dev, "%s at %#llx, GSI %u\n", type, addr, + ioapic->gsi_base); + return 0; + +exit_release: + pci_release_region(dev, 0); +exit_disable: + pci_disable_device(dev); +exit_free: + kfree(ioapic); + return -ENODEV; +} + +static void ioapic_remove(struct pci_dev *dev) +{ + struct ioapic *ioapic = pci_get_drvdata(dev); + + acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base); + pci_release_region(dev, 0); + pci_disable_device(dev); + kfree(ioapic); +} + + +static struct pci_device_id ioapic_devices[] = { + { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_SYSTEM_PIC_IOAPIC << 8, 0xffff00, }, + { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_SYSTEM_PIC_IOXAPIC << 8, 0xffff00, }, + { } +}; + +static struct pci_driver ioapic_driver = { + .name = "ioapic", + .id_table = ioapic_devices, + .probe = ioapic_probe, + .remove = __devexit_p(ioapic_remove), +}; + +static int __init ioapic_init(void) +{ + return pci_register_driver(&ioapic_driver); +} + +static void __exit ioapic_exit(void) +{ + pci_unregister_driver(&ioapic_driver); +} + +module_init(ioapic_init); +module_exit(ioapic_exit); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index e03fe98f061..b2a448e19fe 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -555,7 +555,7 @@ int pci_iov_init(struct pci_dev *dev) { int pos; - if (!dev->is_pcie) + if (!pci_is_pcie(dev)) return -ENODEV; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 33317df4769..cc617ddd33d 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -116,7 +116,7 @@ static void acpi_pci_propagate_wakeup_enable(struct pci_bus *bus, bool enable) int ret; ret = acpi_pm_device_sleep_wake(&bridge->dev, enable); - if (!ret || bridge->is_pcie) + if (!ret || pci_is_pcie(bridge)) return; bus = bus->parent; } @@ -131,7 +131,7 @@ static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable) if (acpi_pci_can_wakeup(dev)) return acpi_pm_device_sleep_wake(&dev->dev, enable); - if (!dev->is_pcie) + if (!pci_is_pcie(dev)) acpi_pci_propagate_wakeup_enable(dev->bus, enable); return 0; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 0f6382f090e..c5df94e8667 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -74,7 +74,11 @@ static ssize_t local_cpus_show(struct device *dev, const struct cpumask *mask; int len; +#ifdef CONFIG_NUMA + mask = cpumask_of_node(dev_to_node(dev)); +#else mask = cpumask_of_pcibus(to_pci_dev(dev)->bus); +#endif len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask); buf[len++] = '\n'; buf[len] = '\0'; @@ -88,7 +92,11 @@ static ssize_t local_cpulist_show(struct device *dev, const struct cpumask *mask; int len; +#ifdef CONFIG_NUMA + mask = cpumask_of_node(dev_to_node(dev)); +#else mask = cpumask_of_pcibus(to_pci_dev(dev)->bus); +#endif len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask); buf[len++] = '\n'; buf[len] = '\0'; @@ -176,6 +184,21 @@ numa_node_show(struct device *dev, struct device_attribute *attr, char *buf) #endif static ssize_t +dma_mask_bits_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return sprintf (buf, "%d\n", fls64(pdev->dma_mask)); +} + +static ssize_t +consistent_dma_mask_bits_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf (buf, "%d\n", fls64(dev->coherent_dma_mask)); +} + +static ssize_t msi_bus_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); @@ -306,6 +329,8 @@ struct device_attribute pci_dev_attrs[] = { #ifdef CONFIG_NUMA __ATTR_RO(numa_node), #endif + __ATTR_RO(dma_mask_bits), + __ATTR_RO(consistent_dma_mask_bits), __ATTR(enable, 0600, is_enabled_show, is_enabled_store), __ATTR(broken_parity_status,(S_IRUGO|S_IWUSR), broken_parity_status_show,broken_parity_status_store), diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4e4c295a049..0bc27e05901 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -47,6 +47,15 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; +/* + * The default CLS is used if arch didn't set CLS explicitly and not + * all pci devices agree on the same value. Arch can override either + * the dfl or actual value as it sees fit. Don't forget this is + * measured in 32-bit words, not bytes. + */ +u8 pci_dfl_cache_line_size __devinitdata = L1_CACHE_BYTES >> 2; +u8 pci_cache_line_size; + /** * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children * @bus: pointer to PCI bus structure to search @@ -373,8 +382,12 @@ pci_find_parent_resource(const struct pci_dev *dev, struct resource *res) continue; /* Wrong type */ if (!((res->flags ^ r->flags) & IORESOURCE_PREFETCH)) return r; /* Exact match */ - if ((res->flags & IORESOURCE_PREFETCH) && !(r->flags & IORESOURCE_PREFETCH)) - best = r; /* Approximating prefetchable by non-prefetchable */ + /* We can't insert a non-prefetch resource inside a prefetchable parent .. */ + if (r->flags & IORESOURCE_PREFETCH) + continue; + /* .. but we can put a prefetchable resource inside a non-prefetchable one */ + if (!best) + best = r; } return best; } @@ -728,8 +741,8 @@ static int pci_save_pcie_state(struct pci_dev *dev) u16 *cap; u16 flags; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (pos <= 0) + pos = pci_pcie_cap(dev); + if (!pos) return 0; save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); @@ -837,7 +850,7 @@ pci_save_state(struct pci_dev *dev) int i; /* XXX: 100% dword access ok here? */ for (i = 0; i < 16; i++) - pci_read_config_dword(dev, i * 4,&dev->saved_config_space[i]); + pci_read_config_dword(dev, i * 4, &dev->saved_config_space[i]); dev->state_saved = true; if ((i = pci_save_pcie_state(dev)) != 0) return i; @@ -1202,7 +1215,7 @@ void pci_pme_active(struct pci_dev *dev, bool enable) pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); - dev_printk(KERN_INFO, &dev->dev, "PME# %s\n", + dev_printk(KERN_DEBUG, &dev->dev, "PME# %s\n", enable ? "enabled" : "disabled"); } @@ -1413,7 +1426,8 @@ void pci_pm_init(struct pci_dev *dev) pmc &= PCI_PM_CAP_PME_MASK; if (pmc) { - dev_info(&dev->dev, "PME# supported from%s%s%s%s%s\n", + dev_printk(KERN_DEBUG, &dev->dev, + "PME# supported from%s%s%s%s%s\n", (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "", (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "", (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "", @@ -1510,7 +1524,7 @@ void pci_enable_ari(struct pci_dev *dev) u16 ctrl; struct pci_dev *bridge; - if (!dev->is_pcie || dev->devfn) + if (!pci_is_pcie(dev) || dev->devfn) return; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI); @@ -1518,10 +1532,10 @@ void pci_enable_ari(struct pci_dev *dev) return; bridge = dev->bus->self; - if (!bridge || !bridge->is_pcie) + if (!bridge || !pci_is_pcie(bridge)) return; - pos = pci_find_capability(bridge, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(bridge); if (!pos) return; @@ -1536,6 +1550,54 @@ void pci_enable_ari(struct pci_dev *dev) bridge->ari_enabled = 1; } +static int pci_acs_enable; + +/** + * pci_request_acs - ask for ACS to be enabled if supported + */ +void pci_request_acs(void) +{ + pci_acs_enable = 1; +} + +/** + * pci_enable_acs - enable ACS if hardware support it + * @dev: the PCI device + */ +void pci_enable_acs(struct pci_dev *dev) +{ + int pos; + u16 cap; + u16 ctrl; + + if (!pci_acs_enable) + return; + + if (!pci_is_pcie(dev)) + return; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_ACS_CAP, &cap); + pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl); + + /* Source Validation */ + ctrl |= (cap & PCI_ACS_SV); + + /* P2P Request Redirect */ + ctrl |= (cap & PCI_ACS_RR); + + /* P2P Completion Redirect */ + ctrl |= (cap & PCI_ACS_CR); + + /* Upstream Forwarding */ + ctrl |= (cap & PCI_ACS_UF); + + pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); +} + /** * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge * @dev: the PCI device @@ -1669,9 +1731,7 @@ static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_n return 0; err_out: - dev_warn(&pdev->dev, "BAR %d: can't reserve %s region %pR\n", - bar, - pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem", + dev_warn(&pdev->dev, "BAR %d: can't reserve %pR\n", bar, &pdev->resource[bar]); return -EBUSY; } @@ -1866,31 +1926,6 @@ void pci_clear_master(struct pci_dev *dev) __pci_set_master(dev, false); } -#ifdef PCI_DISABLE_MWI -int pci_set_mwi(struct pci_dev *dev) -{ - return 0; -} - -int pci_try_set_mwi(struct pci_dev *dev) -{ - return 0; -} - -void pci_clear_mwi(struct pci_dev *dev) -{ -} - -#else - -#ifndef PCI_CACHE_LINE_BYTES -#define PCI_CACHE_LINE_BYTES L1_CACHE_BYTES -#endif - -/* This can be overridden by arch code. */ -/* Don't forget this is measured in 32-bit words, not bytes */ -u8 pci_cache_line_size = PCI_CACHE_LINE_BYTES / 4; - /** * pci_set_cacheline_size - ensure the CACHE_LINE_SIZE register is programmed * @dev: the PCI device for which MWI is to be enabled @@ -1901,13 +1936,12 @@ u8 pci_cache_line_size = PCI_CACHE_LINE_BYTES / 4; * * RETURNS: An appropriate -ERRNO error value on error, or zero for success. */ -static int -pci_set_cacheline_size(struct pci_dev *dev) +int pci_set_cacheline_size(struct pci_dev *dev) { u8 cacheline_size; if (!pci_cache_line_size) - return -EINVAL; /* The system doesn't support MWI. */ + return -EINVAL; /* Validate current setting: the PCI_CACHE_LINE_SIZE must be equal to or multiple of the right value. */ @@ -1928,6 +1962,24 @@ pci_set_cacheline_size(struct pci_dev *dev) return -EINVAL; } +EXPORT_SYMBOL_GPL(pci_set_cacheline_size); + +#ifdef PCI_DISABLE_MWI +int pci_set_mwi(struct pci_dev *dev) +{ + return 0; +} + +int pci_try_set_mwi(struct pci_dev *dev) +{ + return 0; +} + +void pci_clear_mwi(struct pci_dev *dev) +{ +} + +#else /** * pci_set_mwi - enables memory-write-invalidate PCI transaction @@ -2062,6 +2114,7 @@ pci_set_dma_mask(struct pci_dev *dev, u64 mask) return -EIO; dev->dma_mask = mask; + dev_dbg(&dev->dev, "using %dbit DMA mask\n", fls64(mask)); return 0; } @@ -2073,6 +2126,7 @@ pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) return -EIO; dev->dev.coherent_dma_mask = mask; + dev_dbg(&dev->dev, "using %dbit consistent DMA mask\n", fls64(mask)); return 0; } @@ -2099,9 +2153,9 @@ static int pcie_flr(struct pci_dev *dev, int probe) int i; int pos; u32 cap; - u16 status; + u16 status, control; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(dev); if (!pos) return -ENOTTY; @@ -2126,8 +2180,10 @@ static int pcie_flr(struct pci_dev *dev, int probe) "proceeding with reset anyway\n"); clear: - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_BCR_FLR); + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &control); + control |= PCI_EXP_DEVCTL_BCR_FLR; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, control); + msleep(100); return 0; @@ -2450,7 +2506,7 @@ int pcie_get_readrq(struct pci_dev *dev) int ret, cap; u16 ctl; - cap = pci_find_capability(dev, PCI_CAP_ID_EXP); + cap = pci_pcie_cap(dev); if (!cap) return -EINVAL; @@ -2480,7 +2536,7 @@ int pcie_set_readrq(struct pci_dev *dev, int rq) v = (ffs(rq) - 8) << 12; - cap = pci_find_capability(dev, PCI_CAP_ID_EXP); + cap = pci_pcie_cap(dev); if (!cap) goto out; @@ -2540,7 +2596,7 @@ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) return reg; } - dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); + dev_err(&dev->dev, "BAR %d: invalid resource\n", resno); return 0; } @@ -2590,7 +2646,7 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode, #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; -spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(resource_alignment_lock); /** * pci_specified_resource_alignment - get resource alignment specified by user. diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d92d1954a2f..33ed8e0aba1 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -311,4 +311,6 @@ static inline int pci_resource_alignment(struct pci_dev *dev, return resource_alignment(res); } +extern void pci_enable_acs(struct pci_dev *dev); + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c index 62d15f652bb..7fcd5331b14 100644 --- a/drivers/pci/pcie/aer/aer_inject.c +++ b/drivers/pci/pcie/aer/aer_inject.c @@ -23,6 +23,7 @@ #include <linux/pci.h> #include <linux/fs.h> #include <linux/uaccess.h> +#include <linux/stddef.h> #include "aerdrv.h" struct aer_error_inj { @@ -35,10 +36,12 @@ struct aer_error_inj { u32 header_log1; u32 header_log2; u32 header_log3; + u16 domain; }; struct aer_error { struct list_head list; + u16 domain; unsigned int bus; unsigned int devfn; int pos_cap_err; @@ -66,22 +69,27 @@ static LIST_HEAD(pci_bus_ops_list); /* Protect einjected and pci_bus_ops_list */ static DEFINE_SPINLOCK(inject_lock); -static void aer_error_init(struct aer_error *err, unsigned int bus, - unsigned int devfn, int pos_cap_err) +static void aer_error_init(struct aer_error *err, u16 domain, + unsigned int bus, unsigned int devfn, + int pos_cap_err) { INIT_LIST_HEAD(&err->list); + err->domain = domain; err->bus = bus; err->devfn = devfn; err->pos_cap_err = pos_cap_err; } /* inject_lock must be held before calling */ -static struct aer_error *__find_aer_error(unsigned int bus, unsigned int devfn) +static struct aer_error *__find_aer_error(u16 domain, unsigned int bus, + unsigned int devfn) { struct aer_error *err; list_for_each_entry(err, &einjected, list) { - if (bus == err->bus && devfn == err->devfn) + if (domain == err->domain && + bus == err->bus && + devfn == err->devfn) return err; } return NULL; @@ -90,7 +98,10 @@ static struct aer_error *__find_aer_error(unsigned int bus, unsigned int devfn) /* inject_lock must be held before calling */ static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) { - return __find_aer_error(dev->bus->number, dev->devfn); + int domain = pci_domain_nr(dev->bus); + if (domain < 0) + return NULL; + return __find_aer_error((u16)domain, dev->bus->number, dev->devfn); } /* inject_lock must be held before calling */ @@ -172,11 +183,15 @@ static int pci_read_aer(struct pci_bus *bus, unsigned int devfn, int where, struct aer_error *err; unsigned long flags; struct pci_ops *ops; + int domain; spin_lock_irqsave(&inject_lock, flags); if (size != sizeof(u32)) goto out; - err = __find_aer_error(bus->number, devfn); + domain = pci_domain_nr(bus); + if (domain < 0) + goto out; + err = __find_aer_error((u16)domain, bus->number, devfn); if (!err) goto out; @@ -200,11 +215,15 @@ int pci_write_aer(struct pci_bus *bus, unsigned int devfn, int where, int size, unsigned long flags; int rw1cs; struct pci_ops *ops; + int domain; spin_lock_irqsave(&inject_lock, flags); if (size != sizeof(u32)) goto out; - err = __find_aer_error(bus->number, devfn); + domain = pci_domain_nr(bus); + if (domain < 0) + goto out; + err = __find_aer_error((u16)domain, bus->number, devfn); if (!err) goto out; @@ -262,7 +281,7 @@ out: static struct pci_dev *pcie_find_root_port(struct pci_dev *dev) { while (1) { - if (!dev->is_pcie) + if (!pci_is_pcie(dev)) break; if (dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) return dev; @@ -305,25 +324,25 @@ static int aer_inject(struct aer_error_inj *einj) u32 sever; int ret = 0; - dev = pci_get_bus_and_slot(einj->bus, devfn); + dev = pci_get_domain_bus_and_slot((int)einj->domain, einj->bus, devfn); if (!dev) - return -EINVAL; + return -ENODEV; rpdev = pcie_find_root_port(dev); if (!rpdev) { - ret = -EINVAL; + ret = -ENOTTY; goto out_put; } pos_cap_err = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); if (!pos_cap_err) { - ret = -EIO; + ret = -ENOTTY; goto out_put; } pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever); rp_pos_cap_err = pci_find_ext_capability(rpdev, PCI_EXT_CAP_ID_ERR); if (!rp_pos_cap_err) { - ret = -EIO; + ret = -ENOTTY; goto out_put; } @@ -344,7 +363,8 @@ static int aer_inject(struct aer_error_inj *einj) if (!err) { err = err_alloc; err_alloc = NULL; - aer_error_init(err, einj->bus, devfn, pos_cap_err); + aer_error_init(err, einj->domain, einj->bus, devfn, + pos_cap_err); list_add(&err->list, &einjected); } err->uncor_status |= einj->uncor_status; @@ -358,7 +378,8 @@ static int aer_inject(struct aer_error_inj *einj) if (!rperr) { rperr = rperr_alloc; rperr_alloc = NULL; - aer_error_init(rperr, rpdev->bus->number, rpdev->devfn, + aer_error_init(rperr, pci_domain_nr(rpdev->bus), + rpdev->bus->number, rpdev->devfn, rp_pos_cap_err); list_add(&rperr->list, &einjected); } @@ -411,10 +432,11 @@ static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - - if (usize != sizeof(struct aer_error_inj)) + if (usize < offsetof(struct aer_error_inj, domain) || + usize > sizeof(einj)) return -EINVAL; + memset(&einj, 0, sizeof(einj)); if (copy_from_user(&einj, ubuf, usize)) return -EFAULT; @@ -452,7 +474,7 @@ static void __exit aer_inject_exit(void) } spin_lock_irqsave(&inject_lock, flags); - list_for_each_entry_safe(err, err_next, &pci_bus_ops_list, list) { + list_for_each_entry_safe(err, err_next, &einjected, list) { list_del(&err->list); kfree(err); } diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 40c3cc5d1ca..97a345927b5 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -53,7 +53,7 @@ static struct pci_error_handlers aer_error_handlers = { static struct pcie_port_service_driver aerdriver = { .name = "aer", - .port_type = PCIE_RC_PORT, + .port_type = PCI_EXP_TYPE_ROOT_PORT, .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, @@ -295,7 +295,7 @@ static void aer_error_resume(struct pci_dev *dev) u16 reg16; /* Clean up Root device status */ - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(dev); pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, ®16); pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 9f5ccbeb4fa..ae672ca8033 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -35,11 +35,14 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev) u16 reg16 = 0; int pos; + if (dev->aer_firmware_first) + return -EIO; + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); if (!pos) return -EIO; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(dev); if (!pos) return -EIO; @@ -60,7 +63,10 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev) u16 reg16 = 0; int pos; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (dev->aer_firmware_first) + return -EIO; + + pos = pci_pcie_cap(dev); if (!pos) return -EIO; @@ -78,48 +84,27 @@ EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) { int pos; - u32 status, mask; + u32 status; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); if (!pos) return -EIO; pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); - if (dev->error_state == pci_channel_io_normal) - status &= ~mask; /* Clear corresponding nonfatal bits */ - else - status &= mask; /* Clear corresponding fatal bits */ - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + if (status) + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); return 0; } EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); -#if 0 -int pci_cleanup_aer_correct_error_status(struct pci_dev *dev) -{ - int pos; - u32 status; - - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (!pos) - return -EIO; - - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); - - return 0; -} -#endif /* 0 */ - static int set_device_error_reporting(struct pci_dev *dev, void *data) { bool enable = *((bool *)data); - if (dev->pcie_type == PCIE_RC_PORT || - dev->pcie_type == PCIE_SW_UPSTREAM_PORT || - dev->pcie_type == PCIE_SW_DOWNSTREAM_PORT) { + if ((dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) || + (dev->pcie_type == PCI_EXP_TYPE_UPSTREAM) || + (dev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)) { if (enable) pci_enable_pcie_error_reporting(dev); else @@ -218,7 +203,7 @@ static int find_device_iter(struct pci_dev *dev, void *data) */ if (atomic_read(&dev->enable_cnt) == 0) return 0; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(dev); if (!pos) return 0; /* Check if AER is enabled */ @@ -431,10 +416,9 @@ static int find_aer_service_iter(struct device *device, void *data) result = (struct find_aer_service_data *) data; if (device->bus == &pcie_port_bus_type) { - struct pcie_port_data *port_data; + struct pcie_device *pcie = to_pcie_device(device); - port_data = pci_get_drvdata(to_pcie_device(device)->port); - if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT) + if (pcie->port->pcie_type == PCI_EXP_TYPE_DOWNSTREAM) result->is_downstream = 1; driver = device->driver; @@ -612,7 +596,7 @@ void aer_enable_rootport(struct aer_rpc *rpc) u16 reg16; u32 reg32; - pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(pdev); /* Clear PCIE Capability's Device Status */ pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, ®16); pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16); @@ -874,8 +858,22 @@ void aer_delete_rootport(struct aer_rpc *rpc) */ int aer_init(struct pcie_device *dev) { - if (aer_osc_setup(dev) && !forceload) - return -ENXIO; + if (dev->port->aer_firmware_first) { + dev_printk(KERN_DEBUG, &dev->device, + "PCIe errors handled by platform firmware.\n"); + goto out; + } + + if (aer_osc_setup(dev)) + goto out; return 0; +out: + if (forceload) { + dev_printk(KERN_DEBUG, &dev->device, + "aerdrv forceload requested.\n"); + dev->port->aer_firmware_first = 0; + return 0; + } + return -ENXIO; } diff --git a/drivers/pci/pcie/aer/ecrc.c b/drivers/pci/pcie/aer/ecrc.c index a928d8ab6bd..a2747a663bc 100644 --- a/drivers/pci/pcie/aer/ecrc.c +++ b/drivers/pci/pcie/aer/ecrc.c @@ -51,7 +51,7 @@ static int enable_ecrc_checking(struct pci_dev *dev) int pos; u32 reg32; - if (!dev->is_pcie) + if (!pci_is_pcie(dev)) return -ENODEV; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); @@ -79,7 +79,7 @@ static int disable_ecrc_checking(struct pci_dev *dev) int pos; u32 reg32; - if (!dev->is_pcie) + if (!pci_is_pcie(dev)) return -ENODEV; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 5b7056cec00..5a01fc7fbf0 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -122,7 +122,7 @@ static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable) struct pci_bus *linkbus = link->pdev->subordinate; list_for_each_entry(child, &linkbus->devices, bus_list) { - pos = pci_find_capability(child, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(child); if (!pos) return; pci_read_config_word(child, pos + PCI_EXP_LNKCTL, ®16); @@ -156,7 +156,7 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) /* All functions should have the same cap and state, take the worst */ list_for_each_entry(child, &linkbus->devices, bus_list) { - pos = pci_find_capability(child, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(child); if (!pos) return; pci_read_config_dword(child, pos + PCI_EXP_LNKCAP, ®32); @@ -191,23 +191,23 @@ static void pcie_aspm_configure_common_clock(struct pcie_link_state *link) * Configuration, so just check one function */ child = list_entry(linkbus->devices.next, struct pci_dev, bus_list); - BUG_ON(!child->is_pcie); + BUG_ON(!pci_is_pcie(child)); /* Check downstream component if bit Slot Clock Configuration is 1 */ - cpos = pci_find_capability(child, PCI_CAP_ID_EXP); + cpos = pci_pcie_cap(child); pci_read_config_word(child, cpos + PCI_EXP_LNKSTA, ®16); if (!(reg16 & PCI_EXP_LNKSTA_SLC)) same_clock = 0; /* Check upstream component if bit Slot Clock Configuration is 1 */ - ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); + ppos = pci_pcie_cap(parent); pci_read_config_word(parent, ppos + PCI_EXP_LNKSTA, ®16); if (!(reg16 & PCI_EXP_LNKSTA_SLC)) same_clock = 0; /* Configure downstream component, all functions */ list_for_each_entry(child, &linkbus->devices, bus_list) { - cpos = pci_find_capability(child, PCI_CAP_ID_EXP); + cpos = pci_pcie_cap(child); pci_read_config_word(child, cpos + PCI_EXP_LNKCTL, ®16); child_reg[PCI_FUNC(child->devfn)] = reg16; if (same_clock) @@ -247,7 +247,7 @@ static void pcie_aspm_configure_common_clock(struct pcie_link_state *link) dev_printk(KERN_ERR, &parent->dev, "ASPM: Could not configure common clock\n"); list_for_each_entry(child, &linkbus->devices, bus_list) { - cpos = pci_find_capability(child, PCI_CAP_ID_EXP); + cpos = pci_pcie_cap(child); pci_write_config_word(child, cpos + PCI_EXP_LNKCTL, child_reg[PCI_FUNC(child->devfn)]); } @@ -300,7 +300,7 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev, u16 reg16; u32 reg32; - pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(pdev); pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, ®32); info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10; info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12; @@ -420,7 +420,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) child->pcie_type != PCI_EXP_TYPE_LEG_END) continue; - pos = pci_find_capability(child, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(child); pci_read_config_dword(child, pos + PCI_EXP_DEVCAP, ®32); /* Calculate endpoint L0s acceptable latency */ encoding = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6; @@ -436,7 +436,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) { u16 reg16; - int pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + int pos = pci_pcie_cap(pdev); pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, ®16); reg16 &= ~0x3; @@ -503,7 +503,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev) * very strange. Disable ASPM for the whole slot */ list_for_each_entry(child, &pdev->subordinate->devices, bus_list) { - pos = pci_find_capability(child, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(child); if (!pos) return -EINVAL; /* @@ -563,7 +563,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev) struct pcie_link_state *link; int blacklist = !!pcie_aspm_sanity_check(pdev); - if (aspm_disabled || !pdev->is_pcie || pdev->link_state) + if (aspm_disabled || !pci_is_pcie(pdev) || pdev->link_state) return; if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT && pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) @@ -629,7 +629,8 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) struct pci_dev *parent = pdev->bus->self; struct pcie_link_state *link, *root, *parent_link; - if (aspm_disabled || !pdev->is_pcie || !parent || !parent->link_state) + if (aspm_disabled || !pci_is_pcie(pdev) || + !parent || !parent->link_state) return; if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) && (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)) @@ -670,7 +671,7 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev) { struct pcie_link_state *link = pdev->link_state; - if (aspm_disabled || !pdev->is_pcie || !link) + if (aspm_disabled || !pci_is_pcie(pdev) || !link) return; if ((pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT) && (pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)) @@ -696,7 +697,7 @@ void pci_disable_link_state(struct pci_dev *pdev, int state) struct pci_dev *parent = pdev->bus->self; struct pcie_link_state *link; - if (aspm_disabled || !pdev->is_pcie) + if (aspm_disabled || !pci_is_pcie(pdev)) return; if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT || pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM) @@ -841,8 +842,9 @@ void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev) { struct pcie_link_state *link_state = pdev->link_state; - if (!pdev->is_pcie || (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT && - pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) || !link_state) + if (!pci_is_pcie(pdev) || + (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT && + pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) || !link_state) return; if (link_state->aspm_support) @@ -857,8 +859,9 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) { struct pcie_link_state *link_state = pdev->link_state; - if (!pdev->is_pcie || (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT && - pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) || !link_state) + if (!pci_is_pcie(pdev) || + (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT && + pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) || !link_state) return; if (link_state->aspm_support) diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 17ad53868f9..aaeb9d21cba 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -11,31 +11,16 @@ #include <linux/compiler.h> -#if !defined(PCI_CAP_ID_PME) -#define PCI_CAP_ID_PME 1 -#endif - -#if !defined(PCI_CAP_ID_EXP) -#define PCI_CAP_ID_EXP 0x10 -#endif - -#define PORT_TYPE_MASK 0xf -#define PORT_TO_SLOT_MASK 0x100 -#define SLOT_HP_CAPABLE_MASK 0x40 -#define PCIE_CAPABILITIES_REG 0x2 -#define PCIE_SLOT_CAPABILITIES_REG 0x14 -#define PCIE_PORT_DEVICE_MAXSERVICES 4 -#define PCIE_PORT_MSI_VECTOR_MASK 0x1f +#define PCIE_PORT_DEVICE_MAXSERVICES 4 /* - * According to the PCI Express Base Specification 2.0, the indices of the MSI-X - * table entires used by port services must not exceed 31 + * According to the PCI Express Base Specification 2.0, the indices of + * the MSI-X table entires used by port services must not exceed 31 */ #define PCIE_PORT_MAX_MSIX_ENTRIES 32 #define get_descriptor_id(type, service) (((type - 4) << 4) | service) extern struct bus_type pcie_port_bus_type; -extern int pcie_port_device_probe(struct pci_dev *dev); extern int pcie_port_device_register(struct pci_dev *dev); #ifdef CONFIG_PM extern int pcie_port_device_suspend(struct device *dev); diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index ef3a4eeaebb..18bf90f748f 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -26,7 +26,6 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type); static int pcie_port_bus_match(struct device *dev, struct device_driver *drv) { struct pcie_device *pciedev; - struct pcie_port_data *port_data; struct pcie_port_service_driver *driver; if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type) @@ -38,10 +37,8 @@ static int pcie_port_bus_match(struct device *dev, struct device_driver *drv) if (driver->service != pciedev->service) return 0; - port_data = pci_get_drvdata(pciedev->port); - - if (driver->port_type != PCIE_ANY_PORT - && driver->port_type != port_data->port_type) + if ((driver->port_type != PCIE_ANY_PORT) && + (driver->port_type != pciedev->port->pcie_type)) return 0; return 1; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 52f84fca9f7..413262eb95b 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -108,9 +108,9 @@ static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask) * the value in this field indicates which MSI-X Table entry is * used to generate the interrupt message." */ - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, ®16); - entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK; + pos = pci_pcie_cap(dev); + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); + entry = (reg16 & PCI_EXP_FLAGS_IRQ) >> 9; if (entry >= nr_entries) goto Error; @@ -177,37 +177,40 @@ static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask) } /** - * assign_interrupt_mode - choose interrupt mode for PCI Express port services - * (INTx, MSI-X, MSI) and set up vectors + * init_service_irqs - initialize irqs for PCI Express port services * @dev: PCI Express port to handle - * @vectors: Array of interrupt vectors to populate + * @irqs: Array of irqs to populate * @mask: Bitmask of port capabilities returned by get_port_device_capability() * * Return value: Interrupt mode associated with the port */ -static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) +static int init_service_irqs(struct pci_dev *dev, int *irqs, int mask) { - int irq, interrupt_mode = PCIE_PORT_NO_IRQ; - int i; + int i, irq; /* Try to use MSI-X if supported */ - if (!pcie_port_enable_msix(dev, vectors, mask)) - return PCIE_PORT_MSIX_MODE; - + if (!pcie_port_enable_msix(dev, irqs, mask)) + return 0; /* We're not going to use MSI-X, so try MSI and fall back to INTx */ - if (!pci_enable_msi(dev)) - interrupt_mode = PCIE_PORT_MSI_MODE; - - if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin) - interrupt_mode = PCIE_PORT_INTx_MODE; + irq = -1; + if (!pci_enable_msi(dev) || dev->pin) + irq = dev->irq; - irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1; for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) - vectors[i] = irq; + irqs[i] = irq; + irqs[PCIE_PORT_SERVICE_VC_SHIFT] = -1; - vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1; + if (irq < 0) + return -ENODEV; + return 0; +} - return interrupt_mode; +static void cleanup_service_irqs(struct pci_dev *dev) +{ + if (dev->msix_enabled) + pci_disable_msix(dev); + else if (dev->msi_enabled) + pci_disable_msi(dev); } /** @@ -226,13 +229,12 @@ static int get_port_device_capability(struct pci_dev *dev) u16 reg16; u32 reg32; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, ®16); + pos = pci_pcie_cap(dev); + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); /* Hot-Plug Capable */ - if (reg16 & PORT_TO_SLOT_MASK) { - pci_read_config_dword(dev, - pos + PCIE_SLOT_CAPABILITIES_REG, ®32); - if (reg32 & SLOT_HP_CAPABLE_MASK) + if (reg16 & PCI_EXP_FLAGS_SLOT) { + pci_read_config_dword(dev, pos + PCI_EXP_SLTCAP, ®32); + if (reg32 & PCI_EXP_SLTCAP_HPC) services |= PCIE_PORT_SERVICE_HP; } /* AER capable */ @@ -241,80 +243,47 @@ static int get_port_device_capability(struct pci_dev *dev) /* VC support */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC)) services |= PCIE_PORT_SERVICE_VC; + /* Root ports are capable of generating PME too */ + if (dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) + services |= PCIE_PORT_SERVICE_PME; return services; } /** - * pcie_device_init - initialize PCI Express port service device - * @dev: Port service device to initialize - * @parent: PCI Express port to associate the service device with - * @port_type: Type of the port - * @service_type: Type of service to associate with the service device + * pcie_device_init - allocate and initialize PCI Express port service device + * @pdev: PCI Express port to associate the service device with + * @service: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device */ -static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, - int service_type, int irq) +static int pcie_device_init(struct pci_dev *pdev, int service, int irq) { - struct pcie_port_data *port_data = pci_get_drvdata(parent); + int retval; + struct pcie_device *pcie; struct device *device; - int port_type = port_data->port_type; - dev->port = parent; - dev->irq = irq; - dev->service = service_type; + pcie = kzalloc(sizeof(*pcie), GFP_KERNEL); + if (!pcie) + return -ENOMEM; + pcie->port = pdev; + pcie->irq = irq; + pcie->service = service; /* Initialize generic device interface */ - device = &dev->device; - memset(device, 0, sizeof(struct device)); + device = &pcie->device; device->bus = &pcie_port_bus_type; - device->driver = NULL; - dev_set_drvdata(device, NULL); device->release = release_pcie_device; /* callback to free pcie dev */ dev_set_name(device, "%s:pcie%02x", - pci_name(parent), get_descriptor_id(port_type, service_type)); - device->parent = &parent->dev; -} - -/** - * alloc_pcie_device - allocate PCI Express port service device structure - * @parent: PCI Express port to associate the service device with - * @port_type: Type of the port - * @service_type: Type of service to associate with the service device - * @irq: Interrupt vector to associate with the service device - */ -static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, - int service_type, int irq) -{ - struct pcie_device *device; - - device = kzalloc(sizeof(struct pcie_device), GFP_KERNEL); - if (!device) - return NULL; - - pcie_device_init(parent, device, service_type, irq); - return device; -} - -/** - * pcie_port_device_probe - check if device is a PCI Express port - * @dev: Device to check - */ -int pcie_port_device_probe(struct pci_dev *dev) -{ - int pos, type; - u16 reg; - - if (!(pos = pci_find_capability(dev, PCI_CAP_ID_EXP))) - return -ENODEV; - - pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, ®); - type = (reg >> 4) & PORT_TYPE_MASK; - if ( type == PCIE_RC_PORT || type == PCIE_SW_UPSTREAM_PORT || - type == PCIE_SW_DOWNSTREAM_PORT ) - return 0; - - return -ENODEV; + pci_name(pdev), + get_descriptor_id(pdev->pcie_type, service)); + device->parent = &pdev->dev; + + retval = device_register(device); + if (retval) + kfree(pcie); + else + get_device(device); + return retval; } /** @@ -326,77 +295,49 @@ int pcie_port_device_probe(struct pci_dev *dev) */ int pcie_port_device_register(struct pci_dev *dev) { - struct pcie_port_data *port_data; - int status, capabilities, irq_mode, i, nr_serv; - int vectors[PCIE_PORT_DEVICE_MAXSERVICES]; - u16 reg16; - - port_data = kzalloc(sizeof(*port_data), GFP_KERNEL); - if (!port_data) - return -ENOMEM; - pci_set_drvdata(dev, port_data); - - /* Get port type */ - pci_read_config_word(dev, - pci_find_capability(dev, PCI_CAP_ID_EXP) + - PCIE_CAPABILITIES_REG, ®16); - port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK; + int status, capabilities, i, nr_service; + int irqs[PCIE_PORT_DEVICE_MAXSERVICES]; + /* Get and check PCI Express port services */ capabilities = get_port_device_capability(dev); - /* Root ports are capable of generating PME too */ - if (port_data->port_type == PCIE_RC_PORT) - capabilities |= PCIE_PORT_SERVICE_PME; - - irq_mode = assign_interrupt_mode(dev, vectors, capabilities); - if (irq_mode == PCIE_PORT_NO_IRQ) { - /* - * Don't use service devices that require interrupts if there is - * no way to generate them. - */ - if (!(capabilities & PCIE_PORT_SERVICE_VC)) { - status = -ENODEV; - goto Error; - } - capabilities = PCIE_PORT_SERVICE_VC; - } - port_data->port_irq_mode = irq_mode; + if (!capabilities) + return -ENODEV; + /* Enable PCI Express port device */ status = pci_enable_device(dev); if (status) - goto Error; + return status; pci_set_master(dev); + /* + * Initialize service irqs. Don't use service devices that + * require interrupts if there is no way to generate them. + */ + status = init_service_irqs(dev, irqs, capabilities); + if (status) { + capabilities &= PCIE_PORT_SERVICE_VC; + if (!capabilities) + goto error_disable; + } /* Allocate child services if any */ - for (i = 0, nr_serv = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - struct pcie_device *child; + status = -ENODEV; + nr_service = 0; + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { int service = 1 << i; - if (!(capabilities & service)) continue; - - child = alloc_pcie_device(dev, service, vectors[i]); - if (!child) - continue; - - status = device_register(&child->device); - if (status) { - kfree(child); - continue; - } - - get_device(&child->device); - nr_serv++; - } - if (!nr_serv) { - pci_disable_device(dev); - status = -ENODEV; - goto Error; + if (!pcie_device_init(dev, service, irqs[i])) + nr_service++; } + if (!nr_service) + goto error_cleanup_irqs; return 0; - Error: - kfree(port_data); +error_cleanup_irqs: + cleanup_service_irqs(dev); +error_disable: + pci_disable_device(dev); return status; } @@ -464,21 +405,9 @@ static int remove_iter(struct device *dev, void *data) */ void pcie_port_device_remove(struct pci_dev *dev) { - struct pcie_port_data *port_data = pci_get_drvdata(dev); - device_for_each_child(&dev->dev, NULL, remove_iter); + cleanup_service_irqs(dev); pci_disable_device(dev); - - switch (port_data->port_irq_mode) { - case PCIE_PORT_MSIX_MODE: - pci_disable_msix(dev); - break; - case PCIE_PORT_MSI_MODE: - pci_disable_msi(dev); - break; - } - - kfree(port_data); } /** diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index f635e476d63..ce52ea34fee 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -67,14 +67,16 @@ static struct dev_pm_ops pcie_portdrv_pm_ops = { * this port device. * */ -static int __devinit pcie_portdrv_probe (struct pci_dev *dev, - const struct pci_device_id *id ) +static int __devinit pcie_portdrv_probe(struct pci_dev *dev, + const struct pci_device_id *id) { - int status; + int status; - status = pcie_port_device_probe(dev); - if (status) - return status; + if (!pci_is_pcie(dev) || + ((dev->pcie_type != PCI_EXP_TYPE_ROOT_PORT) && + (dev->pcie_type != PCI_EXP_TYPE_UPSTREAM) && + (dev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))) + return -ENODEV; if (!dev->irq && dev->pin) { dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; " diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 8105e32117f..98ffb2de22e 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/cpumask.h> #include <linux/pci-aspm.h> +#include <acpi/acpi_hest.h> #include "pci.h" #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ @@ -163,12 +164,12 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, { u32 l, sz, mask; - mask = type ? ~PCI_ROM_ADDRESS_ENABLE : ~0; + mask = type ? PCI_ROM_ADDRESS_MASK : ~0; res->name = pci_name(dev); pci_read_config_dword(dev, pos, &l); - pci_write_config_dword(dev, pos, mask); + pci_write_config_dword(dev, pos, l | mask); pci_read_config_dword(dev, pos, &sz); pci_write_config_dword(dev, pos, l); @@ -223,9 +224,13 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, goto fail; if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) { - dev_err(&dev->dev, "can't handle 64-bit BAR\n"); + dev_err(&dev->dev, "reg %x: can't handle 64-bit BAR\n", + pos); goto fail; - } else if ((sizeof(resource_size_t) < 8) && l) { + } + + res->flags |= IORESOURCE_MEM_64; + if ((sizeof(resource_size_t) < 8) && l) { /* Address above 32-bit boundary; disable the BAR */ pci_write_config_dword(dev, pos, 0); pci_write_config_dword(dev, pos + 4, 0); @@ -234,14 +239,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, } else { res->start = l64; res->end = l64 + sz64; - dev_printk(KERN_DEBUG, &dev->dev, - "reg %x %s: %pR\n", pos, - (res->flags & IORESOURCE_PREFETCH) ? - "64bit mmio pref" : "64bit mmio", - res); + dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", + pos, res); } - - res->flags |= IORESOURCE_MEM_64; } else { sz = pci_size(l, sz, mask); @@ -251,11 +251,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->start = l; res->end = l + sz; - dev_printk(KERN_DEBUG, &dev->dev, "reg %x %s: %pR\n", pos, - (res->flags & IORESOURCE_IO) ? "io port" : - ((res->flags & IORESOURCE_PREFETCH) ? - "32bit mmio pref" : "32bit mmio"), - res); + dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", pos, res); } out: @@ -297,8 +293,11 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) if (pci_is_root_bus(child)) /* It's a host bus, nothing to read */ return; + dev_info(&dev->dev, "PCI bridge to [bus %02x-%02x]%s\n", + child->secondary, child->subordinate, + dev->transparent ? " (subtractive decode)": ""); + if (dev->transparent) { - dev_info(&dev->dev, "transparent bridge\n"); for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++) child->resource[i] = child->parent->resource[i - 3]; } @@ -323,7 +322,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->start = base; if (!res->end) res->end = limit + 0xfff; - dev_printk(KERN_DEBUG, &dev->dev, "bridge io port: %pR\n", res); + dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } res = child->resource[1]; @@ -335,8 +334,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; res->start = base; res->end = limit + 0xfffff; - dev_printk(KERN_DEBUG, &dev->dev, "bridge 32bit mmio: %pR\n", - res); + dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } res = child->resource[2]; @@ -375,9 +373,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags |= IORESOURCE_MEM_64; res->start = base; res->end = limit + 0xfffff; - dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n", - (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32", - res); + dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } } @@ -651,13 +647,14 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, (child->number > bus->subordinate) || (child->number < bus->number) || (child->subordinate < bus->number)) { - pr_debug("PCI: Bus #%02x (-#%02x) is %s " - "hidden behind%s bridge #%02x (-#%02x)\n", + dev_info(&child->dev, "[bus %02x-%02x] %s " + "hidden behind%s bridge %s [bus %02x-%02x]\n", child->number, child->subordinate, (bus->number > child->subordinate && bus->subordinate < child->number) ? "wholly" : "partially", bus->self->transparent ? " transparent" : "", + dev_name(&bus->dev), bus->number, bus->subordinate); } bus = bus->parent; @@ -693,6 +690,7 @@ static void set_pcie_port_type(struct pci_dev *pdev) if (!pos) return; pdev->is_pcie = 1; + pdev->pcie_cap = pos; pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; } @@ -703,7 +701,7 @@ static void set_pcie_hotplug_bridge(struct pci_dev *pdev) u16 reg16; u32 reg32; - pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(pdev); if (!pos) return; pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); @@ -714,6 +712,12 @@ static void set_pcie_hotplug_bridge(struct pci_dev *pdev) pdev->is_hotplug_bridge = 1; } +static void set_pci_aer_firmware_first(struct pci_dev *pdev) +{ + if (acpi_hest_firmware_first_pci(pdev)) + pdev->aer_firmware_first = 1; +} + #define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED) /** @@ -731,6 +735,7 @@ int pci_setup_device(struct pci_dev *dev) u32 class; u8 hdr_type; struct pci_slot *slot; + int pos = 0; if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type)) return -EIO; @@ -742,6 +747,7 @@ int pci_setup_device(struct pci_dev *dev) dev->multifunction = !!(hdr_type & 0x80); dev->error_state = pci_channel_io_normal; set_pcie_port_type(dev); + set_pci_aer_firmware_first(dev); list_for_each_entry(slot, &dev->bus->slots, list) if (PCI_SLOT(dev->devfn) == slot->number) @@ -822,6 +828,11 @@ int pci_setup_device(struct pci_dev *dev) dev->transparent = ((dev->class & 0xff) == 1); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); set_pcie_hotplug_bridge(dev); + pos = pci_find_capability(dev, PCI_CAP_ID_SSVID); + if (pos) { + pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor); + pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device); + } break; case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */ @@ -907,7 +918,7 @@ int pci_cfg_space_size(struct pci_dev *dev) if (class == PCI_CLASS_BRIDGE_HOST) return pci_cfg_space_size_ext(dev); - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pos = pci_pcie_cap(dev); if (!pos) { pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (!pos) @@ -1014,6 +1025,9 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Single Root I/O Virtualization */ pci_iov_init(dev); + + /* Enable ACS P2P upstream forwarding */ + pci_enable_acs(dev); } void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) @@ -1110,7 +1124,7 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) unsigned int devfn, pass, max = bus->secondary; struct pci_dev *dev; - pr_debug("PCI: Scanning bus %04x:%02x\n", pci_domain_nr(bus), bus->number); + dev_dbg(&bus->dev, "scanning bus\n"); /* Go find them, Rover! */ for (devfn = 0; devfn < 0x100; devfn += 8) @@ -1124,8 +1138,7 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) * all PCI-to-PCI bridges on this bus. */ if (!bus->is_added) { - pr_debug("PCI: Fixups for bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); + dev_dbg(&bus->dev, "fixups for bus\n"); pcibios_fixup_bus(bus); if (pci_is_root_bus(bus)) bus->is_added = 1; @@ -1145,8 +1158,7 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) * * Return how far we've got finding sub-buses. */ - pr_debug("PCI: Bus scan for %04x:%02x returning with max=%02x\n", - pci_domain_nr(bus), bus->number, max); + dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max); return max; } @@ -1154,7 +1166,7 @@ struct pci_bus * pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { int error; - struct pci_bus *b; + struct pci_bus *b, *b2; struct device *dev; b = pci_alloc_bus(); @@ -1170,9 +1182,10 @@ struct pci_bus * pci_create_bus(struct device *parent, b->sysdata = sysdata; b->ops = ops; - if (pci_find_bus(pci_domain_nr(b), bus)) { + b2 = pci_find_bus(pci_domain_nr(b), bus); + if (b2) { /* If we already got to this bus through a different bridge, ignore it */ - pr_debug("PCI: Bus %04x:%02x already known\n", pci_domain_nr(b), bus); + dev_dbg(&b2->dev, "bus already known\n"); goto err_out; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 245d2cdb476..7cfa7c38d31 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -357,7 +357,7 @@ static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region, pcibios_bus_to_resource(dev, res, &bus_region); pci_claim_resource(dev, nr); - dev_info(&dev->dev, "quirk: region %04x-%04x claimed by %s\n", region, region + size - 1, name); + dev_info(&dev->dev, "quirk: %pR claimed by %s\n", res, name); } } @@ -1680,6 +1680,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_ */ #define AMD_813X_MISC 0x40 #define AMD_813X_NOIOAMODE (1<<0) +#define AMD_813X_REV_B1 0x12 #define AMD_813X_REV_B2 0x13 static void quirk_disable_amd_813x_boot_interrupt(struct pci_dev *dev) @@ -1688,7 +1689,8 @@ static void quirk_disable_amd_813x_boot_interrupt(struct pci_dev *dev) if (noioapicquirk) return; - if (dev->revision == AMD_813X_REV_B2) + if ((dev->revision == AMD_813X_REV_B1) || + (dev->revision == AMD_813X_REV_B2)) return; pci_read_config_dword(dev, AMD_813X_MISC, &pci_config_dword); @@ -1698,8 +1700,10 @@ static void quirk_disable_amd_813x_boot_interrupt(struct pci_dev *dev) dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_amd_813x_boot_interrupt); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, quirk_disable_amd_813x_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_amd_813x_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_amd_813x_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, quirk_disable_amd_813x_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, quirk_disable_amd_813x_boot_interrupt); #define AMD_8111_PCI_IRQ_ROUTING 0x56 @@ -2595,9 +2599,37 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) static int __init pci_apply_final_quirks(void) { struct pci_dev *dev = NULL; + u8 cls = 0; + u8 tmp; + + if (pci_cache_line_size) + printk(KERN_DEBUG "PCI: CLS %u bytes\n", + pci_cache_line_size << 2); while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { pci_fixup_device(pci_fixup_final, dev); + /* + * If arch hasn't set it explicitly yet, use the CLS + * value shared by all PCI devices. If there's a + * mismatch, fall back to the default value. + */ + if (!pci_cache_line_size) { + pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &tmp); + if (!cls) + cls = tmp; + if (!tmp || cls == tmp) + continue; + + printk(KERN_DEBUG "PCI: CLS mismatch (%u != %u), " + "using %u bytes\n", cls << 2, tmp << 2, + pci_dfl_cache_line_size << 2); + pci_cache_line_size = pci_dfl_cache_line_size; + } + } + if (!pci_cache_line_size) { + printk(KERN_DEBUG "PCI: CLS %u bytes, default %u\n", + cls << 2, pci_dfl_cache_line_size << 2); + pci_cache_line_size = cls; } return 0; diff --git a/drivers/pci/search.c b/drivers/pci/search.c index ec415352d9b..6dae8714325 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -26,14 +26,14 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev) { struct pci_dev *tmp = NULL; - if (pdev->is_pcie) + if (pci_is_pcie(pdev)) return NULL; while (1) { if (pci_is_root_bus(pdev->bus)) break; pdev = pdev->bus->self; /* a p2p bridge */ - if (!pdev->is_pcie) { + if (!pci_is_pcie(pdev)) { tmp = pdev; continue; } @@ -149,32 +149,33 @@ struct pci_dev * pci_get_slot(struct pci_bus *bus, unsigned int devfn) } /** - * pci_get_bus_and_slot - locate PCI device from a given PCI bus & slot - * @bus: number of PCI bus on which desired PCI device resides - * @devfn: encodes number of PCI slot in which the desired PCI - * device resides and the logical device number within that slot - * in case of multi-function devices. - * - * Note: the bus/slot search is limited to PCI domain (segment) 0. + * pci_get_domain_bus_and_slot - locate PCI device for a given PCI domain (segment), bus, and slot + * @domain: PCI domain/segment on which the PCI device resides. + * @bus: PCI bus on which desired PCI device resides + * @devfn: encodes number of PCI slot in which the desired PCI device + * resides and the logical device number within that slot in case of + * multi-function devices. * - * Given a PCI bus and slot/function number, the desired PCI device - * is located in system global list of PCI devices. If the device - * is found, a pointer to its data structure is returned. If no - * device is found, %NULL is returned. The returned device has its - * reference count bumped by one. + * Given a PCI domain, bus, and slot/function number, the desired PCI + * device is located in the list of PCI devices. If the device is + * found, its reference count is increased and this function returns a + * pointer to its data structure. The caller must decrement the + * reference count by calling pci_dev_put(). If no device is found, + * %NULL is returned. */ - -struct pci_dev * pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) +struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, + unsigned int devfn) { struct pci_dev *dev = NULL; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (pci_domain_nr(dev->bus) == 0 && - (dev->bus->number == bus && dev->devfn == devfn)) + if (pci_domain_nr(dev->bus) == domain && + (dev->bus->number == bus && dev->devfn == devfn)) return dev; } return NULL; } +EXPORT_SYMBOL(pci_get_domain_bus_and_slot); static int match_pci_dev_by_id(struct device *dev, void *data) { @@ -354,5 +355,4 @@ EXPORT_SYMBOL(pci_find_next_bus); EXPORT_SYMBOL(pci_get_device); EXPORT_SYMBOL(pci_get_subsys); EXPORT_SYMBOL(pci_get_slot); -EXPORT_SYMBOL(pci_get_bus_and_slot); EXPORT_SYMBOL(pci_get_class); diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index cb1a027eb55..c48cd377b3f 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -71,53 +71,50 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus) void pci_setup_cardbus(struct pci_bus *bus) { struct pci_dev *bridge = bus->self; + struct resource *res; struct pci_bus_region region; - dev_info(&bridge->dev, "CardBus bridge, secondary bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); + dev_info(&bridge->dev, "CardBus bridge to [bus %02x-%02x]\n", + bus->secondary, bus->subordinate); - pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]); - if (bus->resource[0]->flags & IORESOURCE_IO) { + res = bus->resource[0]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_IO) { /* * The IO resource is allocated a range twice as large as it * would normally need. This allows us to set both IO regs. */ - dev_info(&bridge->dev, " IO window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_IO_BASE_0, region.start); pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_0, region.end); } - pcibios_resource_to_bus(bridge, ®ion, bus->resource[1]); - if (bus->resource[1]->flags & IORESOURCE_IO) { - dev_info(&bridge->dev, " IO window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + res = bus->resource[1]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_IO) { + dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_IO_BASE_1, region.start); pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_1, region.end); } - pcibios_resource_to_bus(bridge, ®ion, bus->resource[2]); - if (bus->resource[2]->flags & IORESOURCE_MEM) { - dev_info(&bridge->dev, " PREFETCH window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + res = bus->resource[2]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_MEM) { + dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0, region.start); pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_0, region.end); } - pcibios_resource_to_bus(bridge, ®ion, bus->resource[3]); - if (bus->resource[3]->flags & IORESOURCE_MEM) { - dev_info(&bridge->dev, " MEM window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + res = bus->resource[3]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_MEM) { + dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1, region.start); pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_1, @@ -140,34 +137,33 @@ EXPORT_SYMBOL(pci_setup_cardbus); static void pci_setup_bridge(struct pci_bus *bus) { struct pci_dev *bridge = bus->self; + struct resource *res; struct pci_bus_region region; u32 l, bu, lu, io_upper16; - int pref_mem64; if (pci_is_enabled(bridge)) return; - dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); + dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n", + bus->secondary, bus->subordinate); /* Set up the top and bottom of the PCI I/O segment for this bus. */ - pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]); - if (bus->resource[0]->flags & IORESOURCE_IO) { + res = bus->resource[0]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_IO) { pci_read_config_dword(bridge, PCI_IO_BASE, &l); l &= 0xffff0000; l |= (region.start >> 8) & 0x00f0; l |= region.end & 0xf000; /* Set up upper 16 bits of I/O base/limit. */ io_upper16 = (region.end & 0xffff0000) | (region.start >> 16); - dev_info(&bridge->dev, " IO window: %#04lx-%#04lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + dev_info(&bridge->dev, " bridge window %pR\n", res); } else { /* Clear upper 16 bits of I/O base/limit. */ io_upper16 = 0; l = 0x00f0; - dev_info(&bridge->dev, " IO window: disabled\n"); + dev_info(&bridge->dev, " bridge window [io disabled]\n"); } /* Temporarily disable the I/O range before updating PCI_IO_BASE. */ pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff); @@ -178,17 +174,16 @@ static void pci_setup_bridge(struct pci_bus *bus) /* Set up the top and bottom of the PCI Memory segment for this bus. */ - pcibios_resource_to_bus(bridge, ®ion, bus->resource[1]); - if (bus->resource[1]->flags & IORESOURCE_MEM) { + res = bus->resource[1]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_MEM) { l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; - dev_info(&bridge->dev, " MEM window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + dev_info(&bridge->dev, " bridge window %pR\n", res); } else { l = 0x0000fff0; - dev_info(&bridge->dev, " MEM window: disabled\n"); + dev_info(&bridge->dev, " bridge window [mem disabled]\n"); } pci_write_config_dword(bridge, PCI_MEMORY_BASE, l); @@ -198,34 +193,27 @@ static void pci_setup_bridge(struct pci_bus *bus) pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0); /* Set up PREF base/limit. */ - pref_mem64 = 0; bu = lu = 0; - pcibios_resource_to_bus(bridge, ®ion, bus->resource[2]); - if (bus->resource[2]->flags & IORESOURCE_PREFETCH) { - int width = 8; + res = bus->resource[2]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_PREFETCH) { l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; - if (bus->resource[2]->flags & IORESOURCE_MEM_64) { - pref_mem64 = 1; + if (res->flags & IORESOURCE_MEM_64) { bu = upper_32_bits(region.start); lu = upper_32_bits(region.end); - width = 16; } - dev_info(&bridge->dev, " PREFETCH window: %#0*llx-%#0*llx\n", - width, (unsigned long long)region.start, - width, (unsigned long long)region.end); + dev_info(&bridge->dev, " bridge window %pR\n", res); } else { l = 0x0000fff0; - dev_info(&bridge->dev, " PREFETCH window: disabled\n"); + dev_info(&bridge->dev, " bridge window [mem pref disabled]\n"); } pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l); - if (pref_mem64) { - /* Set the upper 32 bits of PREF base & limit. */ - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu); - pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu); - } + /* Set the upper 32 bits of PREF base & limit. */ + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu); + pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu); pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl); } @@ -345,6 +333,10 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size) #endif size = ALIGN(size + size1, 4096); if (!size) { + if (b_res->start || b_res->end) + dev_info(&bus->self->dev, "disabling bridge window " + "%pR to [bus %02x-%02x] (unused)\n", b_res, + bus->secondary, bus->subordinate); b_res->flags = 0; return; } @@ -390,8 +382,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, align = pci_resource_alignment(dev, r); order = __ffs(align) - 20; if (order > 11) { - dev_warn(&dev->dev, "BAR %d bad alignment %llx: " - "%pR\n", i, (unsigned long long)align, r); + dev_warn(&dev->dev, "disabling BAR %d: %pR " + "(bad alignment %#llx)\n", i, r, + (unsigned long long) align); r->flags = 0; continue; } @@ -425,6 +418,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, } size = ALIGN(size, min_align); if (!size) { + if (b_res->start || b_res->end) + dev_info(&bus->self->dev, "disabling bridge window " + "%pR to [bus %02x-%02x] (unused)\n", b_res, + bus->secondary, bus->subordinate); b_res->flags = 0; return 1; } @@ -582,10 +579,7 @@ static void pci_bus_dump_res(struct pci_bus *bus) if (!res || !res->end) continue; - dev_printk(KERN_DEBUG, &bus->dev, "resource %d %s %pR\n", i, - (res->flags & IORESOURCE_IO) ? "io: " : - ((res->flags & IORESOURCE_PREFETCH)? "pref mem":"mem:"), - res); + dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pR\n", i, res); } } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index c54526b206b..7d678bb15ff 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -51,12 +51,6 @@ void pci_update_resource(struct pci_dev *dev, int resno) pcibios_resource_to_bus(dev, ®ion, res); - dev_dbg(&dev->dev, "BAR %d: got res %pR bus [%#llx-%#llx] " - "flags %#lx\n", resno, res, - (unsigned long long)region.start, - (unsigned long long)region.end, - (unsigned long)res->flags); - new = region.start | (res->flags & PCI_REGION_FLAG_MASK); if (res->flags & IORESOURCE_IO) mask = (u32)PCI_BASE_ADDRESS_IO_MASK; @@ -91,9 +85,9 @@ void pci_update_resource(struct pci_dev *dev, int resno) } } res->flags &= ~IORESOURCE_UNSET; - dev_dbg(&dev->dev, "BAR %d: moved to bus [%#llx-%#llx] flags %#lx\n", - resno, (unsigned long long)region.start, - (unsigned long long)region.end, res->flags); + dev_info(&dev->dev, "BAR %d: set to %pR (PCI address [%#llx-%#llx]\n", + resno, res, (unsigned long long)region.start, + (unsigned long long)region.end); } int pci_claim_resource(struct pci_dev *dev, int resource) @@ -103,20 +97,17 @@ int pci_claim_resource(struct pci_dev *dev, int resource) int err; root = pci_find_parent_resource(dev, res); - - err = -EINVAL; - if (root != NULL) - err = request_resource(root, res); - - if (err) { - const char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; - dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", - resource, - root ? "address space collision on" : - "no parent found for", - dtype, res); + if (!root) { + dev_err(&dev->dev, "no compatible bridge window for %pR\n", + res); + return -EINVAL; } + err = request_resource(root, res); + if (err) + dev_err(&dev->dev, + "address space collision: %pR already in use\n", res); + return err; } EXPORT_SYMBOL(pci_claim_resource); @@ -124,7 +115,7 @@ EXPORT_SYMBOL(pci_claim_resource); #ifdef CONFIG_PCI_QUIRKS void pci_disable_bridge_window(struct pci_dev *dev) { - dev_dbg(&dev->dev, "Disabling bridge window.\n"); + dev_info(&dev->dev, "disabling bridge mem windows\n"); /* MMIO Base/Limit */ pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); @@ -165,6 +156,7 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, if (!ret) { res->flags &= ~IORESOURCE_STARTALIGN; + dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); if (resno < PCI_BRIDGE_RESOURCES) pci_update_resource(dev, resno); } @@ -178,12 +170,12 @@ int pci_assign_resource(struct pci_dev *dev, int resno) resource_size_t align; struct pci_bus *bus; int ret; + char *type; align = pci_resource_alignment(dev, res); if (!align) { - dev_info(&dev->dev, "BAR %d: can't allocate resource (bogus " - "alignment) %pR flags %#lx\n", - resno, res, res->flags); + dev_info(&dev->dev, "BAR %d: can't assign %pR " + "(bogus alignment)\n", resno, res); return -EINVAL; } @@ -198,9 +190,20 @@ int pci_assign_resource(struct pci_dev *dev, int resno) break; } - if (ret) - dev_info(&dev->dev, "BAR %d: can't allocate %s resource %pR\n", - resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res); + if (ret) { + if (res->flags & IORESOURCE_MEM) + if (res->flags & IORESOURCE_PREFETCH) + type = "mem pref"; + else + type = "mem"; + else if (res->flags & IORESOURCE_IO) + type = "io"; + else + type = "unknown"; + dev_info(&dev->dev, + "BAR %d: can't assign %s (size %#llx)\n", + resno, type, (unsigned long long) resource_size(res)); + } return ret; } @@ -225,9 +228,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) r_align = pci_resource_alignment(dev, r); if (!r_align) { - dev_warn(&dev->dev, "BAR %d: bogus alignment " - "%pR flags %#lx\n", - i, r, r->flags); + dev_warn(&dev->dev, "BAR %d: %pR has bogus alignment\n", + i, r); continue; } for (list = head; ; list = list->next) { @@ -274,8 +276,8 @@ int pci_enable_resources(struct pci_dev *dev, int mask) continue; if (!r->parent) { - dev_err(&dev->dev, "device not available because of " - "BAR %d %pR collisions\n", i, r); + dev_err(&dev->dev, "device not available " + "(can't reserve %pR)\n", r); return -EINVAL; } diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c index 4cd70d05681..a73b040ddbf 100644 --- a/drivers/pcmcia/cardbus.c +++ b/drivers/pcmcia/cardbus.c @@ -184,26 +184,33 @@ fail: =====================================================================*/ -/* - * Since there is only one interrupt available to CardBus - * devices, all devices downstream of this device must - * be using this IRQ. - */ -static void cardbus_assign_irqs(struct pci_bus *bus, int irq) +static void cardbus_config_irq_and_cls(struct pci_bus *bus, int irq) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { u8 irq_pin; + /* + * Since there is only one interrupt available to + * CardBus devices, all devices downstream of this + * device must be using this IRQ. + */ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq_pin); if (irq_pin) { dev->irq = irq; pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); } + /* + * Some controllers transfer very slowly with 0 CLS. + * Configure it. This may fail as CLS configuration + * is mandatory only for MWI. + */ + pci_set_cacheline_size(dev); + if (dev->subordinate) - cardbus_assign_irqs(dev->subordinate, irq); + cardbus_config_irq_and_cls(dev->subordinate, irq); } } @@ -228,7 +235,7 @@ int __ref cb_alloc(struct pcmcia_socket * s) */ pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); - cardbus_assign_irqs(bus, s->pci_irq); + cardbus_config_irq_and_cls(bus, s->pci_irq); /* socket specific tune function */ if (s->tune_bridge) diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 8473fe5ed7f..dfbd5a6cc58 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -285,15 +285,10 @@ static void quirk_system_pci_resources(struct pnp_dev *dev) * the PCI region, and that might prevent a PCI * driver from requesting its resources. */ - dev_warn(&dev->dev, "%s resource " - "(0x%llx-0x%llx) overlaps %s BAR %d " - "(0x%llx-0x%llx), disabling\n", - pnp_resource_type_name(res), - (unsigned long long) pnp_start, - (unsigned long long) pnp_end, - pci_name(pdev), i, - (unsigned long long) pci_start, - (unsigned long long) pci_end); + dev_warn(&dev->dev, + "disabling %pR because it overlaps " + "%s BAR %d %pR\n", res, + pci_name(pdev), i, &pdev->resource[i]); res->flags |= IORESOURCE_DISABLED; } } diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index ba976542788..64d0596bafb 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -517,7 +517,7 @@ struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, res->start = irq; res->end = irq; - pnp_dbg(&dev->dev, " add irq %d flags %#x\n", irq, flags); + pnp_dbg(&dev->dev, " add %pr\n", res); return pnp_res; } @@ -538,7 +538,7 @@ struct pnp_resource *pnp_add_dma_resource(struct pnp_dev *dev, int dma, res->start = dma; res->end = dma; - pnp_dbg(&dev->dev, " add dma %d flags %#x\n", dma, flags); + pnp_dbg(&dev->dev, " add %pr\n", res); return pnp_res; } @@ -562,8 +562,7 @@ struct pnp_resource *pnp_add_io_resource(struct pnp_dev *dev, res->start = start; res->end = end; - pnp_dbg(&dev->dev, " add io %#llx-%#llx flags %#x\n", - (unsigned long long) start, (unsigned long long) end, flags); + pnp_dbg(&dev->dev, " add %pr\n", res); return pnp_res; } @@ -587,8 +586,7 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev, res->start = start; res->end = end; - pnp_dbg(&dev->dev, " add mem %#llx-%#llx flags %#x\n", - (unsigned long long) start, (unsigned long long) end, flags); + pnp_dbg(&dev->dev, " add %pr\n", res); return pnp_res; } diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index 63087d5ce60..9585c1c1cc3 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -75,47 +75,14 @@ char *pnp_resource_type_name(struct resource *res) void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc) { - char buf[128]; - int len; struct pnp_resource *pnp_res; - struct resource *res; - if (list_empty(&dev->resources)) { + if (list_empty(&dev->resources)) pnp_dbg(&dev->dev, "%s: no current resources\n", desc); - return; - } - - pnp_dbg(&dev->dev, "%s: current resources:\n", desc); - list_for_each_entry(pnp_res, &dev->resources, list) { - res = &pnp_res->res; - len = 0; - - len += scnprintf(buf + len, sizeof(buf) - len, " %-3s ", - pnp_resource_type_name(res)); - - if (res->flags & IORESOURCE_DISABLED) { - pnp_dbg(&dev->dev, "%sdisabled\n", buf); - continue; - } - - switch (pnp_resource_type(res)) { - case IORESOURCE_IO: - case IORESOURCE_MEM: - len += scnprintf(buf + len, sizeof(buf) - len, - "%#llx-%#llx flags %#lx", - (unsigned long long) res->start, - (unsigned long long) res->end, - res->flags); - break; - case IORESOURCE_IRQ: - case IORESOURCE_DMA: - len += scnprintf(buf + len, sizeof(buf) - len, - "%lld flags %#lx", - (unsigned long long) res->start, - res->flags); - break; - } - pnp_dbg(&dev->dev, "%s\n", buf); + else { + pnp_dbg(&dev->dev, "%s: current resources:\n", desc); + list_for_each_entry(pnp_res, &dev->resources, list) + pnp_dbg(&dev->dev, "%pr\n", &pnp_res->res); } } diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 59b90922da8..49c1720df59 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -22,11 +22,11 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; -static void reserve_range(struct pnp_dev *dev, resource_size_t start, - resource_size_t end, int port) +static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); + resource_size_t start = r->start, end = r->end; struct resource *res; regionid = kmalloc(16, GFP_KERNEL); @@ -48,10 +48,8 @@ static void reserve_range(struct pnp_dev *dev, resource_size_t start, * example do reserve stuff they know about too, so we may well * have double reservations. */ - dev_info(&dev->dev, "%s range 0x%llx-0x%llx %s reserved\n", - port ? "ioport" : "iomem", - (unsigned long long) start, (unsigned long long) end, - res ? "has been" : "could not be"); + dev_info(&dev->dev, "%pR %s reserved\n", r, + res ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) @@ -77,14 +75,14 @@ static void reserve_resources_of_dev(struct pnp_dev *dev) if (res->end < res->start) continue; /* invalid */ - reserve_range(dev, res->start, res->end, 1); + reserve_range(dev, res, 1); } for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, i)); i++) { if (res->flags & IORESOURCE_DISABLED) continue; - reserve_range(dev, res->start, res->end, 0); + reserve_range(dev, res, 0); } } diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 91a68e9eb66..603598f4dbb 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -25,7 +25,10 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/mm.h> + #include <asm/xen/hypervisor.h> + +#include <xen/xen.h> #include <xen/events.h> #include <xen/page.h> #include <xen/interface/io/fbif.h> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 42043361358..f6738d8b02b 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -52,6 +52,8 @@ #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> + +#include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/interface/memory.h> #include <xen/xenbus.h> diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index 0f765a92018..14e2d995e95 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -1,5 +1,6 @@ #include <linux/notifier.h> +#include <xen/xen.h> #include <xen/xenbus.h> #include <asm/xen/hypervisor.h> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 79bedba44fe..f70a4f4698c 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -48,6 +48,8 @@ #include <linux/gfp.h> #include <linux/mutex.h> #include <linux/cpu.h> + +#include <xen/xen.h> #include <xen/events.h> #include <xen/evtchn.h> #include <asm/xen/hypervisor.h> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7d8f531fb8e..4c6c0bd636a 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -37,6 +37,7 @@ #include <linux/vmalloc.h> #include <linux/uaccess.h> +#include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/page.h> #include <xen/grant_table.h> diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 88a60e03ccf..ae5cb05a1a1 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -14,6 +14,7 @@ #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/xenbus.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 649fcdf114b..2f7aaa99dc4 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -49,6 +49,8 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/xen/hypervisor.h> + +#include <xen/xen.h> #include <xen/xenbus.h> #include <xen/events.h> #include <xen/page.h> diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 6559e0c752c..8924d93136f 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -13,6 +13,8 @@ #include <linux/fs.h> #include <linux/magic.h> +#include <xen/xen.h> + #include "xenfs.h" #include <asm/xen/hypervisor.h> diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index d69e6ae5925..3f959f1879d 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -142,29 +142,75 @@ static void nilfs_palloc_desc_block_init(struct inode *inode, } } +static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, + int create, + void (*init_block)(struct inode *, + struct buffer_head *, + void *), + struct buffer_head **bhp, + struct nilfs_bh_assoc *prev, + spinlock_t *lock) +{ + int ret; + + spin_lock(lock); + if (prev->bh && blkoff == prev->blkoff) { + get_bh(prev->bh); + *bhp = prev->bh; + spin_unlock(lock); + return 0; + } + spin_unlock(lock); + + ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp); + if (!ret) { + spin_lock(lock); + /* + * The following code must be safe for change of the + * cache contents during the get block call. + */ + brelse(prev->bh); + get_bh(*bhp); + prev->bh = *bhp; + prev->blkoff = blkoff; + spin_unlock(lock); + } + return ret; +} + static int nilfs_palloc_get_desc_block(struct inode *inode, unsigned long group, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, - nilfs_palloc_desc_blkoff(inode, group), - create, nilfs_palloc_desc_block_init, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_desc_blkoff(inode, group), + create, nilfs_palloc_desc_block_init, + bhp, &cache->prev_desc, &cache->lock); } static int nilfs_palloc_get_bitmap_block(struct inode *inode, unsigned long group, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, - nilfs_palloc_bitmap_blkoff(inode, group), - create, NULL, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_bitmap_blkoff(inode, group), + create, NULL, bhp, + &cache->prev_bitmap, &cache->lock); } int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr), - create, NULL, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_entry_blkoff(inode, nr), + create, NULL, bhp, + &cache->prev_entry, &cache->lock); } static struct nilfs_palloc_group_desc * @@ -176,13 +222,6 @@ nilfs_palloc_block_get_group_desc(const struct inode *inode, group % nilfs_palloc_groups_per_desc_block(inode); } -static unsigned char * -nilfs_palloc_block_get_bitmap(const struct inode *inode, - const struct buffer_head *bh, void *kaddr) -{ - return (unsigned char *)(kaddr + bh_offset(bh)); -} - void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, const struct buffer_head *bh, void *kaddr) { @@ -289,8 +328,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, if (ret < 0) goto out_desc; bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap( - inode, bitmap_bh, bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(bitmap_bh); pos = nilfs_palloc_find_available_slot( inode, group, group_offset, bitmap, entries_per_group); @@ -351,8 +389,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, desc = nilfs_palloc_block_get_group_desc(inode, group, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, - bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) @@ -385,8 +422,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, desc = nilfs_palloc_block_get_group_desc(inode, group, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, - bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) printk(KERN_WARNING "%s: entry numer %llu already freed\n", @@ -472,8 +508,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) desc = nilfs_palloc_block_get_group_desc( inode, group, desc_bh, desc_kaddr); bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap( - inode, bitmap_bh, bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(bitmap_bh); for (j = i, n = 0; (j < nitems) && nilfs_palloc_group_is_in(inode, group, entry_nrs[j]); @@ -502,3 +537,30 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) } return 0; } + +void nilfs_palloc_setup_cache(struct inode *inode, + struct nilfs_palloc_cache *cache) +{ + NILFS_MDT(inode)->mi_palloc_cache = cache; + spin_lock_init(&cache->lock); +} + +void nilfs_palloc_clear_cache(struct inode *inode) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + spin_lock(&cache->lock); + brelse(cache->prev_desc.bh); + brelse(cache->prev_bitmap.bh); + brelse(cache->prev_entry.bh); + cache->prev_desc.bh = NULL; + cache->prev_bitmap.bh = NULL; + cache->prev_entry.bh = NULL; + spin_unlock(&cache->lock); +} + +void nilfs_palloc_destroy_cache(struct inode *inode) +{ + nilfs_palloc_clear_cache(inode); + NILFS_MDT(inode)->mi_palloc_cache = NULL; +} diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 4ace5475c2c..f4543ac4f56 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -69,4 +69,25 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_clear_bit_atomic ext2_clear_bit_atomic #define nilfs_find_next_zero_bit ext2_find_next_zero_bit +/* + * persistent object allocator cache + */ + +struct nilfs_bh_assoc { + unsigned long blkoff; + struct buffer_head *bh; +}; + +struct nilfs_palloc_cache { + spinlock_t lock; + struct nilfs_bh_assoc prev_desc; + struct nilfs_bh_assoc prev_bitmap; + struct nilfs_bh_assoc prev_entry; +}; + +void nilfs_palloc_setup_cache(struct inode *inode, + struct nilfs_palloc_cache *cache); +void nilfs_palloc_clear_cache(struct inode *inode); +void nilfs_palloc_destroy_cache(struct inode *inode); + #endif /* _NILFS_ALLOC_H */ diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 08834df6ec6..f4a14ea2ed9 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -402,19 +402,11 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) { inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); - if (NILFS_MDT(bmap->b_inode)) - nilfs_mdt_mark_dirty(bmap->b_inode); - else - mark_inode_dirty(bmap->b_inode); } void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) { inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); - if (NILFS_MDT(bmap->b_inode)) - nilfs_mdt_mark_dirty(bmap->b_inode); - else - mark_inode_dirty(bmap->b_inode); } __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 84c25382f8e..471e269536a 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -68,9 +68,34 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) truncate_inode_pages(btnc, 0); } +struct buffer_head * +nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) +{ + struct inode *inode = NILFS_BTNC_I(btnc); + struct buffer_head *bh; + + bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); + if (unlikely(!bh)) + return NULL; + + if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || + buffer_dirty(bh))) { + brelse(bh); + BUG(); + } + memset(bh->b_data, 0, 1 << inode->i_blkbits); + bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_blocknr = blocknr; + set_buffer_mapped(bh); + set_buffer_uptodate(bh); + + unlock_page(bh->b_page); + page_cache_release(bh->b_page); + return bh; +} + int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, struct buffer_head **pbh, - int newblk) + sector_t pblocknr, struct buffer_head **pbh) { struct buffer_head *bh; struct inode *inode = NILFS_BTNC_I(btnc); @@ -81,19 +106,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, return -ENOMEM; err = -EEXIST; /* internal code */ - if (newblk) { - if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || - buffer_dirty(bh))) { - brelse(bh); - BUG(); - } - memset(bh->b_data, 0, 1 << inode->i_blkbits); - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; - bh->b_blocknr = blocknr; - set_buffer_mapped(bh); - set_buffer_uptodate(bh); - goto found; - } if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; @@ -135,27 +147,6 @@ out_locked: return err; } -int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, struct buffer_head **pbh, int newblk) -{ - struct buffer_head *bh; - int err; - - err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk); - if (err == -EEXIST) /* internal code (cache hit) */ - return 0; - if (unlikely(err)) - return err; - - bh = *pbh; - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - return -EIO; - } - return 0; -} - /** * nilfs_btnode_delete - delete B-tree node buffer * @bh: buffer to be deleted @@ -244,12 +235,13 @@ retry: unlock_page(obh->b_page); } - err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); - if (likely(!err)) { - BUG_ON(nbh == obh); - ctxt->newbh = nbh; - } - return err; + nbh = nilfs_btnode_create_block(btnc, newkey); + if (!nbh) + return -ENOMEM; + + BUG_ON(nbh == obh); + ctxt->newbh = nbh; + return 0; failed_unlock: unlock_page(obh->b_page); diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 3e2275172ed..07da83f0771 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -40,10 +40,10 @@ struct nilfs_btnode_chkey_ctxt { void nilfs_btnode_cache_init_once(struct address_space *); void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); void nilfs_btnode_cache_clear(struct address_space *); +struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, + __u64 blocknr); int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, - struct buffer_head **, int); -int nilfs_btnode_get(struct address_space *, __u64, sector_t, - struct buffer_head **, int); + struct buffer_head **); void nilfs_btnode_delete(struct buffer_head *); int nilfs_btnode_prepare_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index e25b507a474..7cdd98b8d51 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -114,7 +114,18 @@ static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr, { struct address_space *btnc = &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; - return nilfs_btnode_get(btnc, ptr, 0, bhp, 0); + int err; + + err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp); + if (err) + return err == -EEXIST ? 0 : err; + + wait_on_buffer(*bhp); + if (!buffer_uptodate(*bhp)) { + brelse(*bhp); + return -EIO; + } + return 0; } static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, @@ -122,12 +133,15 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, { struct address_space *btnc = &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; - int ret; + struct buffer_head *bh; - ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1); - if (!ret) - set_buffer_nilfs_volatile(*bhp); - return ret; + bh = nilfs_btnode_create_block(btnc, ptr); + if (!bh) + return -ENOMEM; + + set_buffer_nilfs_volatile(bh); + *bhp = bh; + return 0; } static inline int @@ -444,6 +458,18 @@ nilfs_btree_get_node(const struct nilfs_btree *btree, nilfs_btree_get_nonroot_node(path, level); } +static inline int +nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) +{ + if (unlikely(nilfs_btree_node_get_level(node) != level)) { + dump_stack(); + printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n", + nilfs_btree_node_get_level(node), level); + return 1; + } + return 0; +} + static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, struct nilfs_btree_path *path, __u64 key, __u64 *ptrp, int minlevel) @@ -467,7 +493,8 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); - BUG_ON(level != nilfs_btree_node_get_level(node)); + if (nilfs_btree_bad_node(node, level)) + return -EINVAL; if (!found) found = nilfs_btree_node_lookup(node, key, &index); else @@ -512,7 +539,8 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); - BUG_ON(level != nilfs_btree_node_get_level(node)); + if (nilfs_btree_bad_node(node, level)) + return -EINVAL; index = nilfs_btree_node_get_nchildren(node) - 1; ptr = nilfs_btree_node_get_ptr(btree, node, index); path[level].bp_index = index; @@ -638,13 +666,11 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree, { if (level < nilfs_btree_height(btree) - 1) { do { - lock_buffer(path[level].bp_bh); nilfs_btree_node_set_key( nilfs_btree_get_nonroot_node(path, level), path[level].bp_index, key); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); } while ((path[level].bp_index == 0) && (++level < nilfs_btree_height(btree) - 1)); } @@ -663,13 +689,11 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree, struct nilfs_btree_node *node; if (level < nilfs_btree_height(btree) - 1) { - lock_buffer(path[level].bp_bh); node = nilfs_btree_get_nonroot_node(path, level); nilfs_btree_node_insert(btree, node, *keyp, *ptrp, path[level].bp_index); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, @@ -689,9 +713,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, struct nilfs_btree_node *node, *left; int nchildren, lnchildren, n, move; - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -712,9 +733,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -740,9 +758,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, struct nilfs_btree_node *node, *right; int nchildren, rnchildren, n, move; - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -763,9 +778,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); @@ -794,9 +806,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree, __u64 newptr; int nchildren, n, move; - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -815,9 +824,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - newkey = nilfs_btree_node_get_key(right, 0); newptr = path[level].bp_newreq.bpr_ptr; @@ -852,8 +858,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree, struct nilfs_btree_node *root, *child; int n; - lock_buffer(path[level].bp_sib_bh); - root = nilfs_btree_get_root(btree); child = nilfs_btree_get_sib_node(path, level); @@ -865,8 +869,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_sib_bh); - path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; @@ -1023,11 +1025,9 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, stats->bs_nblocks++; - lock_buffer(bh); nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, 0, level, 0, NULL, NULL); - unlock_buffer(bh); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_split; } @@ -1052,10 +1052,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, if (ret < 0) goto err_out_curr_node; - lock_buffer(bh); nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, 0, level, 0, NULL, NULL); - unlock_buffer(bh); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_grow; @@ -1154,13 +1152,11 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree, struct nilfs_btree_node *node; if (level < nilfs_btree_height(btree) - 1) { - lock_buffer(path[level].bp_bh); node = nilfs_btree_get_nonroot_node(path, level); nilfs_btree_node_delete(btree, node, keyp, ptrp, path[level].bp_index); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -1180,9 +1176,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -1197,9 +1190,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -1217,9 +1207,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -1234,9 +1221,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); @@ -1255,9 +1239,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); @@ -1268,9 +1249,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; @@ -1286,9 +1264,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); @@ -1299,9 +1274,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level + 1].bp_index++; @@ -1316,7 +1288,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); root = nilfs_btree_get_root(btree); child = nilfs_btree_get_nonroot_node(path, level); @@ -1324,7 +1295,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree, nilfs_btree_node_set_level(root, level); n = nilfs_btree_node_get_nchildren(child); nilfs_btree_node_move_left(btree, root, child, n); - unlock_buffer(path[level].bp_bh); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; @@ -1699,7 +1669,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat); /* create child node at level 1 */ - lock_buffer(bh); node = (struct nilfs_btree_node *)bh->b_data; nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); nilfs_btree_node_insert(btree, node, @@ -1709,7 +1678,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, if (!nilfs_bmap_dirty(bmap)) nilfs_bmap_set_dirty(bmap); - unlock_buffer(bh); brelse(bh); /* create root node at level 2 */ @@ -2050,7 +2018,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) - list_splice(&lists[level], listp->prev); + list_splice_tail(&lists[level], listp); } static int nilfs_btree_assign_p(struct nilfs_btree *btree, diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 0e72bbbc6b6..4b82d84ade7 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -34,28 +34,6 @@ struct nilfs_btree; struct nilfs_btree_path; /** - * struct nilfs_btree_node - B-tree node - * @bn_flags: flags - * @bn_level: level - * @bn_nchildren: number of children - * @bn_pad: padding - */ -struct nilfs_btree_node { - __u8 bn_flags; - __u8 bn_level; - __le16 bn_nchildren; - __le32 bn_pad; -}; - -/* flags */ -#define NILFS_BTREE_NODE_ROOT 0x01 - -/* level */ -#define NILFS_BTREE_LEVEL_DATA 0 -#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) -#define NILFS_BTREE_LEVEL_MAX 14 - -/** * struct nilfs_btree - B-tree structure * @bt_bmap: bmap base structure */ diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 3f5d5d06f53..d5ad54e204a 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -926,3 +926,29 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) up_read(&NILFS_MDT(cpfile)->mi_sem); return ret; } + +/** + * nilfs_cpfile_read - read cpfile inode + * @cpfile: cpfile inode + * @raw_inode: on-disk cpfile inode + */ +int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode) +{ + return nilfs_read_inode_common(cpfile, raw_inode); +} + +/** + * nilfs_cpfile_new - create cpfile + * @nilfs: nilfs object + * @cpsize: size of a checkpoint entry + */ +struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize) +{ + struct inode *cpfile; + + cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0); + if (cpfile) + nilfs_mdt_set_entry_size(cpfile, cpsize, + sizeof(struct nilfs_cpfile_header)); + return cpfile; +} diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index debea896e70..bc0809e0ab4 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -40,4 +40,7 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, size_t); +int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode); +struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize); + #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 1ff8e15bd36..187dd07ba86 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -33,6 +33,16 @@ #define NILFS_CNO_MIN ((__u64)1) #define NILFS_CNO_MAX (~(__u64)0) +struct nilfs_dat_info { + struct nilfs_mdt_info mi; + struct nilfs_palloc_cache palloc_cache; +}; + +static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) +{ + return (struct nilfs_dat_info *)NILFS_MDT(dat); +} + static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { @@ -425,3 +435,40 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, return nvi; } + +/** + * nilfs_dat_read - read dat inode + * @dat: dat inode + * @raw_inode: on-disk dat inode + */ +int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode) +{ + return nilfs_read_inode_common(dat, raw_inode); +} + +/** + * nilfs_dat_new - create dat file + * @nilfs: nilfs object + * @entry_size: size of a dat entry + */ +struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size) +{ + static struct lock_class_key dat_lock_key; + struct inode *dat; + struct nilfs_dat_info *di; + int err; + + dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, sizeof(*di)); + if (dat) { + err = nilfs_palloc_init_blockgroup(dat, entry_size); + if (unlikely(err)) { + nilfs_mdt_destroy(dat); + return NULL; + } + + di = NILFS_DAT_I(dat); + lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); + nilfs_palloc_setup_cache(dat, &di->palloc_cache); + } + return dat; +} diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index 406070d3ff4..d31c3aab0ef 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -53,4 +53,7 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); +int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode); +struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size); + #endif /* _NILFS_DAT_H */ diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index e097099bfc8..76d803e060a 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -99,9 +99,9 @@ static int nilfs_prepare_chunk(struct page *page, NULL, nilfs_get_block); } -static int nilfs_commit_chunk(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) +static void nilfs_commit_chunk(struct page *page, + struct address_space *mapping, + unsigned from, unsigned to) { struct inode *dir = mapping->host; struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); @@ -112,15 +112,13 @@ static int nilfs_commit_chunk(struct page *page, nr_dirty = nilfs_page_count_clean_buffers(page, from, to); copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); - if (pos + copied > dir->i_size) { + if (pos + copied > dir->i_size) i_size_write(dir, pos + copied); - mark_inode_dirty(dir); - } if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); err = nilfs_set_file_dirty(sbi, dir, nr_dirty); + WARN_ON(err); /* do not happen */ unlock_page(page); - return err; } static void nilfs_check_page(struct page *page) @@ -455,11 +453,10 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, BUG_ON(err); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); - err = nilfs_commit_chunk(page, mapping, from, to); + nilfs_commit_chunk(page, mapping, from, to); nilfs_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(dir); } /* @@ -548,10 +545,10 @@ got_it: memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); - err = nilfs_commit_chunk(page, page->mapping, from, to); + nilfs_commit_chunk(page, page->mapping, from, to); dir->i_mtime = dir->i_ctime = CURRENT_TIME; /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(dir); + nilfs_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: nilfs_put_page(page); @@ -595,10 +592,9 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) if (pde) pde->rec_len = cpu_to_le16(to - from); dir->inode = 0; - err = nilfs_commit_chunk(page, mapping, from, to); + nilfs_commit_chunk(page, mapping, from, to); inode->i_ctime = inode->i_mtime = CURRENT_TIME; /* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(inode); out: nilfs_put_page(page); return err; @@ -640,7 +636,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) memcpy(de->name, "..\0", 4); nilfs_set_de_type(de, inode); kunmap_atomic(kaddr, KM_USER0); - err = nilfs_commit_chunk(page, mapping, 0, chunk_size); + nilfs_commit_chunk(page, mapping, 0, chunk_size); fail: page_cache_release(page); return err; diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c index 93383c5cee9..dd5f7e0a95f 100644 --- a/fs/nilfs2/gcdat.c +++ b/fs/nilfs2/gcdat.c @@ -61,6 +61,8 @@ void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); + nilfs_palloc_clear_cache(dat); + nilfs_palloc_clear_cache(gcdat); nilfs_clear_dirty_pages(mapping); nilfs_copy_back_pages(mapping, gmapping); /* note: mdt dirty flags should be cleared by segctor. */ @@ -79,6 +81,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) gcdat->i_state = I_CLEAR; gii->i_flags = 0; + nilfs_palloc_clear_cache(gcdat); truncate_inode_pages(gcdat->i_mapping, 0); truncate_inode_pages(&gii->i_btnode_cache, 0); } diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index e6de0a27ab5..e16a6664dfa 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -149,7 +149,7 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, - vbn ? : pbn, pbn, out_bh, 0); + vbn ? : pbn, pbn, out_bh); if (ret == -EEXIST) /* internal code (cache hit) */ ret = 0; return ret; @@ -212,9 +212,10 @@ void nilfs_destroy_gccache(struct the_nilfs *nilfs) static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, __u64 cno) { - struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS); + struct inode *inode; struct nilfs_inode_info *ii; + inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS, 0); if (!inode) return NULL; @@ -265,7 +266,6 @@ struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) */ void nilfs_clear_gcinode(struct inode *inode) { - nilfs_mdt_clear(inode); nilfs_mdt_destroy(inode); } diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index de86401f209..922d9dd42c8 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -29,6 +29,17 @@ #include "alloc.h" #include "ifile.h" + +struct nilfs_ifile_info { + struct nilfs_mdt_info mi; + struct nilfs_palloc_cache palloc_cache; +}; + +static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile) +{ + return (struct nilfs_ifile_info *)NILFS_MDT(ifile); +} + /** * nilfs_ifile_create_inode - create a new disk inode * @ifile: ifile inode @@ -148,3 +159,27 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } return err; } + +/** + * nilfs_ifile_new - create inode file + * @sbi: nilfs_sb_info struct + * @inode_size: size of an inode + */ +struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size) +{ + struct inode *ifile; + int err; + + ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO, + sizeof(struct nilfs_ifile_info)); + if (ifile) { + err = nilfs_palloc_init_blockgroup(ifile, inode_size); + if (unlikely(err)) { + nilfs_mdt_destroy(ifile); + return NULL; + } + nilfs_palloc_setup_cache(ifile, + &NILFS_IFILE_I(ifile)->palloc_cache); + } + return ifile; +} diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index ecc3ba76db4..cbca32e498f 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -49,4 +49,6 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); int nilfs_ifile_delete_inode(struct inode *, ino_t); int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); +struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size); + #endif /* _NILFS_IFILE_H */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 2a0a5a3ac13..7868cc122ac 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -97,6 +97,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, nilfs_transaction_abort(inode->i_sb); goto out; } + nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); @@ -322,7 +323,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) nilfs_init_acl(), proper cancellation of above jobs should be considered */ - mark_inode_dirty(inode); return inode; failed_acl: @@ -525,7 +525,6 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); - /* The buffer is guarded with lock_buffer() by the caller */ if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); @@ -599,6 +598,7 @@ void nilfs_truncate(struct inode *inode) if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); + nilfs_mark_inode_dirty(inode); nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. @@ -623,6 +623,7 @@ void nilfs_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); nilfs_truncate_bmap(ii, 0); + nilfs_mark_inode_dirty(inode); nilfs_free_inode(inode); /* nilfs_free_inode() marks inode buffer dirty */ if (IS_SYNC(inode)) @@ -745,9 +746,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) "failed to reget inode block.\n"); return err; } - lock_buffer(ibh); nilfs_update_inode(inode, ibh); - unlock_buffer(ibh); nilfs_mdt_mark_buffer_dirty(ibh); nilfs_mdt_mark_dirty(sbi->s_ifile); brelse(ibh); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index f6326112d64..06713ffcc7f 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -186,7 +186,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, } static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, - struct buffer_head **out_bh) + int readahead, struct buffer_head **out_bh) { struct buffer_head *first_bh, *bh; unsigned long blkoff; @@ -200,16 +200,18 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, if (unlikely(err)) goto failed; - blkoff = block + 1; - for (i = 0; i < nr_ra_blocks; i++, blkoff++) { - err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); - if (likely(!err || err == -EEXIST)) - brelse(bh); - else if (err != -EBUSY) - break; /* abort readahead if bmap lookup failed */ - - if (!buffer_locked(first_bh)) - goto out_no_wait; + if (readahead) { + blkoff = block + 1; + for (i = 0; i < nr_ra_blocks; i++, blkoff++) { + err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); + if (likely(!err || err == -EEXIST)) + brelse(bh); + else if (err != -EBUSY) + break; + /* abort readahead if bmap lookup failed */ + if (!buffer_locked(first_bh)) + goto out_no_wait; + } } wait_on_buffer(first_bh); @@ -263,7 +265,7 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, /* Should be rewritten with merging nilfs_mdt_read_block() */ retry: - ret = nilfs_mdt_read_block(inode, blkoff, out_bh); + ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh); if (!create || ret != -ENOENT) return ret; @@ -371,7 +373,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) struct buffer_head *bh; int err; - err = nilfs_mdt_read_block(inode, block, &bh); + err = nilfs_mdt_read_block(inode, block, 0, &bh); if (unlikely(err)) return err; nilfs_mark_buffer_dirty(bh); @@ -445,9 +447,17 @@ static const struct file_operations def_mdt_fops; * longer than those of the super block structs; they may continue for * several consecutive mounts/umounts. This would need discussions. */ +/** + * nilfs_mdt_new_common - allocate a pseudo inode for metadata file + * @nilfs: nilfs object + * @sb: super block instance the metadata file belongs to + * @ino: inode number + * @gfp_mask: gfp mask for data pages + * @objsz: size of the private object attached to inode->i_private + */ struct inode * nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, gfp_t gfp_mask) + ino_t ino, gfp_t gfp_mask, size_t objsz) { struct inode *inode = nilfs_alloc_inode_common(nilfs); @@ -455,8 +465,9 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, return NULL; else { struct address_space * const mapping = &inode->i_data; - struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); + struct nilfs_mdt_info *mi; + mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); if (!mi) { nilfs_destroy_inode(inode); return NULL; @@ -513,11 +524,11 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, } struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino) + ino_t ino, size_t objsz) { - struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, - NILFS_MDT_GFP); + struct inode *inode; + inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz); if (!inode) return NULL; @@ -544,14 +555,15 @@ void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) &NILFS_I(orig)->i_btnode_cache; } -void nilfs_mdt_clear(struct inode *inode) +static void nilfs_mdt_clear(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); invalidate_mapping_pages(inode->i_mapping, 0, -1); truncate_inode_pages(inode->i_mapping, 0); - nilfs_bmap_clear(ii->i_bmap); + if (test_bit(NILFS_I_BMAP, &ii->i_state)) + nilfs_bmap_clear(ii->i_bmap); nilfs_btnode_cache_clear(&ii->i_btnode_cache); } @@ -559,6 +571,10 @@ void nilfs_mdt_destroy(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + if (mdi->mi_palloc_cache) + nilfs_palloc_destroy_cache(inode); + nilfs_mdt_clear(inode); + kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ kfree(mdi); nilfs_destroy_inode(inode); diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index 431599733c9..6c4bbb0470f 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -36,6 +36,7 @@ * @mi_entry_size: size of an entry * @mi_first_entry_offset: offset to the first entry * @mi_entries_per_block: number of entries in a block + * @mi_palloc_cache: persistent object allocator cache * @mi_blocks_per_group: number of blocks in a group * @mi_blocks_per_desc_block: number of blocks per descriptor block */ @@ -46,6 +47,7 @@ struct nilfs_mdt_info { unsigned mi_entry_size; unsigned mi_first_entry_offset; unsigned long mi_entries_per_block; + struct nilfs_palloc_cache *mi_palloc_cache; unsigned long mi_blocks_per_group; unsigned long mi_blocks_per_desc_block; }; @@ -74,11 +76,11 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long); int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); int nilfs_mdt_fetch_dirty(struct inode *); -struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t); +struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, + size_t); struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, - ino_t, gfp_t); + ino_t, gfp_t, size_t); void nilfs_mdt_destroy(struct inode *); -void nilfs_mdt_clear(struct inode *); void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); void nilfs_mdt_set_shadow(struct inode *, struct inode *); @@ -104,21 +106,4 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode) #define nilfs_mdt_bgl_lock(inode, bg) \ (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) - -static inline int -nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh, - unsigned n) -{ - return nilfs_read_inode_common( - inode, (struct nilfs_inode *)(bh->b_data + n)); -} - -static inline void -nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh, - unsigned n) -{ - nilfs_write_inode_common( - inode, (struct nilfs_inode *)(bh->b_data + n), 1); -} - #endif /* _NILFS_MDT_H */ diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index ed02e886fa7..07ba838ef08 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -120,7 +120,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, inode->i_op = &nilfs_file_inode_operations; inode->i_fop = &nilfs_file_operations; inode->i_mapping->a_ops = &nilfs_aops; - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } if (!err) @@ -148,7 +148,7 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } if (!err) @@ -188,7 +188,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, goto out_fail; /* mark_inode_dirty(inode); */ - /* nilfs_new_inode() and page_symlink() do this */ + /* page_symlink() do this */ err = nilfs_add_nondir(dentry, inode); out: @@ -200,7 +200,8 @@ out: return err; out_fail: - inode_dec_link_count(inode); + drop_nlink(inode); + nilfs_mark_inode_dirty(inode); iput(inode); goto out; } @@ -245,7 +246,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) return err; - inode_inc_link_count(dir); + inc_nlink(dir); inode = nilfs_new_inode(dir, S_IFDIR | mode); err = PTR_ERR(inode); @@ -256,7 +257,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_fop = &nilfs_dir_operations; inode->i_mapping->a_ops = &nilfs_aops; - inode_inc_link_count(inode); + inc_nlink(inode); err = nilfs_make_empty(inode, dir); if (err) @@ -266,6 +267,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; + nilfs_mark_inode_dirty(inode); d_instantiate(dentry, inode); out: if (!err) @@ -276,26 +278,23 @@ out: return err; out_fail: - inode_dec_link_count(inode); - inode_dec_link_count(inode); + drop_nlink(inode); + drop_nlink(inode); + nilfs_mark_inode_dirty(inode); iput(inode); out_dir: - inode_dec_link_count(dir); + drop_nlink(dir); + nilfs_mark_inode_dirty(dir); goto out; } -static int nilfs_unlink(struct inode *dir, struct dentry *dentry) +static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode; struct nilfs_dir_entry *de; struct page *page; - struct nilfs_transaction_info ti; int err; - err = nilfs_transaction_begin(dir->i_sb, &ti, 0); - if (err) - return err; - err = -ENOENT; de = nilfs_find_entry(dir, dentry, &page); if (!de) @@ -317,12 +316,28 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) goto out; inode->i_ctime = dir->i_ctime; - inode_dec_link_count(inode); + drop_nlink(inode); err = 0; out: - if (!err) + return err; +} + +static int nilfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct nilfs_transaction_info ti; + int err; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 0); + if (err) + return err; + + err = nilfs_do_unlink(dir, dentry); + + if (!err) { + nilfs_mark_inode_dirty(dir); + nilfs_mark_inode_dirty(dentry->d_inode); err = nilfs_transaction_commit(dir->i_sb); - else + } else nilfs_transaction_abort(dir->i_sb); return err; @@ -340,11 +355,13 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; if (nilfs_empty_dir(inode)) { - err = nilfs_unlink(dir, dentry); + err = nilfs_do_unlink(dir, dentry); if (!err) { inode->i_size = 0; - inode_dec_link_count(inode); - inode_dec_link_count(dir); + drop_nlink(inode); + nilfs_mark_inode_dirty(inode); + drop_nlink(dir); + nilfs_mark_inode_dirty(dir); } } if (!err) @@ -395,42 +412,48 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); if (!new_de) goto out_dir; - inode_inc_link_count(old_inode); + inc_nlink(old_inode); nilfs_set_link(new_dir, new_de, new_page, old_inode); + nilfs_mark_inode_dirty(new_dir); new_inode->i_ctime = CURRENT_TIME; if (dir_de) drop_nlink(new_inode); - inode_dec_link_count(new_inode); + drop_nlink(new_inode); + nilfs_mark_inode_dirty(new_inode); } else { if (dir_de) { err = -EMLINK; if (new_dir->i_nlink >= NILFS_LINK_MAX) goto out_dir; } - inode_inc_link_count(old_inode); + inc_nlink(old_inode); err = nilfs_add_link(new_dentry, old_inode); if (err) { - inode_dec_link_count(old_inode); + drop_nlink(old_inode); + nilfs_mark_inode_dirty(old_inode); goto out_dir; } - if (dir_de) - inode_inc_link_count(new_dir); + if (dir_de) { + inc_nlink(new_dir); + nilfs_mark_inode_dirty(new_dir); + } } /* * Like most other Unix systems, set the ctime for inodes on a * rename. - * inode_dec_link_count() will mark the inode dirty. */ old_inode->i_ctime = CURRENT_TIME; nilfs_delete_entry(old_de, old_page); - inode_dec_link_count(old_inode); + drop_nlink(old_inode); if (dir_de) { nilfs_set_link(old_inode, dir_de, dir_page, new_dir); - inode_dec_link_count(old_dir); + drop_nlink(old_dir); } + nilfs_mark_inode_dirty(old_dir); + nilfs_mark_inode_dirty(old_inode); err = nilfs_transaction_commit(old_dir->i_sb); return err; diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 6dc83591d11..c9c96c7825d 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -770,14 +770,8 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, nilfs_finish_roll_forward(nilfs, sbi, ri); } - nilfs_detach_checkpoint(sbi); - return 0; - failed: nilfs_detach_checkpoint(sbi); - nilfs_mdt_clear(nilfs->ns_cpfile); - nilfs_mdt_clear(nilfs->ns_sufile); - nilfs_mdt_clear(nilfs->ns_dat); return err; } @@ -804,6 +798,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, struct nilfs_segsum_info ssi; sector_t pseg_start, pseg_end, sr_pseg_start = 0; sector_t seg_start, seg_end; /* range of full segment (block number) */ + sector_t b, end; u64 seg_seq; __u64 segnum, nextnum = 0; __u64 cno; @@ -819,6 +814,11 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, /* Calculate range of segment */ nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); + /* Read ahead segment */ + b = seg_start; + while (b <= seg_end) + sb_breadahead(sbi->s_super, b++); + for (;;) { /* Load segment summary */ ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); @@ -841,14 +841,20 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, ri->ri_nextnum = nextnum; empty_seg = 0; + if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) { + /* This will never happen because a superblock + (last_segment) always points to a pseg + having a super root. */ + ret = NILFS_SEG_FAIL_CONSISTENCY; + goto failed; + } + + if (pseg_start == seg_start) { + nilfs_get_segment_range(nilfs, nextnum, &b, &end); + while (b <= end) + sb_breadahead(sbi->s_super, b++); + } if (!NILFS_SEG_HAS_SR(&ssi)) { - if (!scan_newer) { - /* This will never happen because a superblock - (last_segment) always points to a pseg - having a super root. */ - ret = NILFS_SEG_FAIL_CONSISTENCY; - goto failed; - } if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { ri->ri_lsegs_start = pseg_start; ri->ri_lsegs_start_seq = seg_seq; @@ -919,7 +925,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, super_root_found: /* Updating pointers relating to the latest checkpoint */ - list_splice(&segments, ri->ri_used_segments.prev); + list_splice_tail(&segments, &ri->ri_used_segments); nilfs->ns_last_pseg = sr_pseg_start; nilfs->ns_last_seq = nilfs->ns_seg_seq; nilfs->ns_last_cno = ri->ri_cno; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index e6d9e37fa24..645c78656aa 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -24,10 +24,22 @@ #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/crc32.h> +#include <linux/backing-dev.h> #include "page.h" #include "segbuf.h" +struct nilfs_write_info { + struct the_nilfs *nilfs; + struct bio *bio; + int start, end; /* The region to be submitted */ + int rest_blocks; + int max_pages; + int nr_vecs; + sector_t blocknr; +}; + + static struct kmem_cache *nilfs_segbuf_cachep; static void nilfs_segbuf_init_once(void *obj) @@ -63,6 +75,11 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) INIT_LIST_HEAD(&segbuf->sb_list); INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); INIT_LIST_HEAD(&segbuf->sb_payload_buffers); + + init_completion(&segbuf->sb_bio_event); + atomic_set(&segbuf->sb_err, 0); + segbuf->sb_nbio = 0; + return segbuf; } @@ -83,6 +100,22 @@ void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; } +/** + * nilfs_segbuf_map_cont - map a new log behind a given log + * @segbuf: new segment buffer + * @prev: segment buffer containing a log to be continued + */ +void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, + struct nilfs_segment_buffer *prev) +{ + segbuf->sb_segnum = prev->sb_segnum; + segbuf->sb_fseg_start = prev->sb_fseg_start; + segbuf->sb_fseg_end = prev->sb_fseg_end; + segbuf->sb_pseg_start = prev->sb_pseg_start + prev->sb_sum.nblocks; + segbuf->sb_rest_blocks = + segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; +} + void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, __u64 nextnum, struct the_nilfs *nilfs) { @@ -132,8 +165,6 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; segbuf->sb_sum.ctime = ctime; - - segbuf->sb_io_error = 0; return 0; } @@ -219,7 +250,7 @@ void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, raw_sum->ss_datasum = cpu_to_le32(crc); } -void nilfs_release_buffers(struct list_head *list) +static void nilfs_release_buffers(struct list_head *list) { struct buffer_head *bh, *n; @@ -241,13 +272,56 @@ void nilfs_release_buffers(struct list_head *list) } } +static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) +{ + nilfs_release_buffers(&segbuf->sb_segsum_buffers); + nilfs_release_buffers(&segbuf->sb_payload_buffers); +} + +/* + * Iterators for segment buffers + */ +void nilfs_clear_logs(struct list_head *logs) +{ + struct nilfs_segment_buffer *segbuf; + + list_for_each_entry(segbuf, logs, sb_list) + nilfs_segbuf_clear(segbuf); +} + +void nilfs_truncate_logs(struct list_head *logs, + struct nilfs_segment_buffer *last) +{ + struct nilfs_segment_buffer *n, *segbuf; + + segbuf = list_prepare_entry(last, logs, sb_list); + list_for_each_entry_safe_continue(segbuf, n, logs, sb_list) { + list_del_init(&segbuf->sb_list); + nilfs_segbuf_clear(segbuf); + nilfs_segbuf_free(segbuf); + } +} + +int nilfs_wait_on_logs(struct list_head *logs) +{ + struct nilfs_segment_buffer *segbuf; + int err; + + list_for_each_entry(segbuf, logs, sb_list) { + err = nilfs_segbuf_wait(segbuf); + if (err) + return err; + } + return 0; +} + /* * BIO operations */ static void nilfs_end_bio_write(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nilfs_write_info *wi = bio->bi_private; + struct nilfs_segment_buffer *segbuf = bio->bi_private; if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); @@ -256,21 +330,22 @@ static void nilfs_end_bio_write(struct bio *bio, int err) } if (!uptodate) - atomic_inc(&wi->err); + atomic_inc(&segbuf->sb_err); bio_put(bio); - complete(&wi->bio_event); + complete(&segbuf->sb_bio_event); } -static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) +static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi, int mode) { struct bio *bio = wi->bio; int err; - if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) { - wait_for_completion(&wi->bio_event); - wi->nbio--; - if (unlikely(atomic_read(&wi->err))) { + if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->nilfs->ns_bdi)) { + wait_for_completion(&segbuf->sb_bio_event); + segbuf->sb_nbio--; + if (unlikely(atomic_read(&segbuf->sb_err))) { bio_put(bio); err = -EIO; goto failed; @@ -278,7 +353,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) } bio->bi_end_io = nilfs_end_bio_write; - bio->bi_private = wi; + bio->bi_private = segbuf; bio_get(bio); submit_bio(mode, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) { @@ -286,7 +361,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) err = -EOPNOTSUPP; goto failed; } - wi->nbio++; + segbuf->sb_nbio++; bio_put(bio); wi->bio = NULL; @@ -301,17 +376,15 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) } /** - * nilfs_alloc_seg_bio - allocate a bio for writing segment. - * @sb: super block - * @start: beginning disk block number of this BIO. + * nilfs_alloc_seg_bio - allocate a new bio for writing log + * @nilfs: nilfs object + * @start: start block number of the bio * @nr_vecs: request size of page vector. * - * alloc_seg_bio() allocates a new BIO structure and initialize it. - * * Return Value: On success, pointer to the struct bio is returned. * On error, NULL is returned. */ -static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, +static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start, int nr_vecs) { struct bio *bio; @@ -322,36 +395,33 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, bio = bio_alloc(GFP_NOIO, nr_vecs); } if (likely(bio)) { - bio->bi_bdev = sb->s_bdev; - bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9); + bio->bi_bdev = nilfs->ns_bdev; + bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9); } return bio; } -void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) +static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi) { wi->bio = NULL; wi->rest_blocks = segbuf->sb_sum.nblocks; - wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); + wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev); wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end = 0; - wi->nbio = 0; wi->blocknr = segbuf->sb_pseg_start; - - atomic_set(&wi->err, 0); - init_completion(&wi->bio_event); } -static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, - int mode) +static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi, + struct buffer_head *bh, int mode) { int len, err; BUG_ON(wi->nr_vecs <= 0); repeat: if (!wi->bio) { - wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end, + wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end, wi->nr_vecs); if (unlikely(!wi->bio)) return -ENOMEM; @@ -363,76 +433,83 @@ static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, return 0; } /* bio is FULL */ - err = nilfs_submit_seg_bio(wi, mode); + err = nilfs_segbuf_submit_bio(segbuf, wi, mode); /* never submit current bh */ if (likely(!err)) goto repeat; return err; } +/** + * nilfs_segbuf_write - submit write requests of a log + * @segbuf: buffer storing a log to be written + * @nilfs: nilfs object + * + * Return Value: On Success, 0 is returned. On Error, one of the following + * negative error code is returned. + * + * %-EIO - I/O error + * + * %-ENOMEM - Insufficient memory available. + */ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) + struct the_nilfs *nilfs) { + struct nilfs_write_info wi; struct buffer_head *bh; - int res, rw = WRITE; + int res = 0, rw = WRITE; + + wi.nilfs = nilfs; + nilfs_segbuf_prepare_write(segbuf, &wi); list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - res = nilfs_submit_bh(wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); if (unlikely(res)) goto failed_bio; } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - res = nilfs_submit_bh(wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); if (unlikely(res)) goto failed_bio; } - if (wi->bio) { + if (wi.bio) { /* * Last BIO is always sent through the following * submission. */ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); - res = nilfs_submit_seg_bio(wi, rw); - if (unlikely(res)) - goto failed_bio; + res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); } - res = 0; - out: - return res; - failed_bio: - atomic_inc(&wi->err); - goto out; + return res; } /** * nilfs_segbuf_wait - wait for completion of requested BIOs - * @wi: nilfs_write_info + * @segbuf: segment buffer * * Return Value: On Success, 0 is returned. On Error, one of the following * negative error code is returned. * * %-EIO - I/O error */ -int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) +int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) { int err = 0; - if (!wi->nbio) + if (!segbuf->sb_nbio) return 0; do { - wait_for_completion(&wi->bio_event); - } while (--wi->nbio > 0); + wait_for_completion(&segbuf->sb_bio_event); + } while (--segbuf->sb_nbio > 0); - if (unlikely(atomic_read(&wi->err) > 0)) { + if (unlikely(atomic_read(&segbuf->sb_err) > 0)) { printk(KERN_ERR "NILFS: IO error writing segment\n"); err = -EIO; - segbuf->sb_io_error = 1; } return err; } diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 0c3076f4e59..6af1630fb40 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -27,7 +27,6 @@ #include <linux/buffer_head.h> #include <linux/bio.h> #include <linux/completion.h> -#include <linux/backing-dev.h> /** * struct nilfs_segsum_info - On-memory segment summary @@ -77,7 +76,9 @@ struct nilfs_segsum_info { * @sb_rest_blocks: Number of residual blocks in the current segment * @sb_segsum_buffers: List of buffers for segment summaries * @sb_payload_buffers: List of buffers for segment payload - * @sb_io_error: I/O error status + * @sb_nbio: Number of flying bio requests + * @sb_err: I/O error status + * @sb_bio_event: Completion event of log writing */ struct nilfs_segment_buffer { struct super_block *sb_super; @@ -96,7 +97,9 @@ struct nilfs_segment_buffer { struct list_head sb_payload_buffers; /* including super root */ /* io status */ - int sb_io_error; + int sb_nbio; + atomic_t sb_err; + struct completion sb_bio_event; }; #define NILFS_LIST_SEGBUF(head) \ @@ -125,6 +128,8 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); void nilfs_segbuf_free(struct nilfs_segment_buffer *); void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, struct the_nilfs *); +void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, + struct nilfs_segment_buffer *prev); void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, struct the_nilfs *); int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); @@ -161,41 +166,18 @@ nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf, segbuf->sb_sum.nfileblk++; } -void nilfs_release_buffers(struct list_head *); +int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, + struct the_nilfs *nilfs); +int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf); -static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) +void nilfs_clear_logs(struct list_head *logs); +void nilfs_truncate_logs(struct list_head *logs, + struct nilfs_segment_buffer *last); +int nilfs_wait_on_logs(struct list_head *logs); + +static inline void nilfs_destroy_logs(struct list_head *logs) { - nilfs_release_buffers(&segbuf->sb_segsum_buffers); - nilfs_release_buffers(&segbuf->sb_payload_buffers); + nilfs_truncate_logs(logs, NULL); } -struct nilfs_write_info { - struct bio *bio; - int start, end; /* The region to be submitted */ - int rest_blocks; - int max_pages; - int nr_vecs; - sector_t blocknr; - - int nbio; - atomic_t err; - struct completion bio_event; - /* completion event of segment write */ - - /* - * The following fields must be set explicitly - */ - struct super_block *sb; - struct backing_dev_info *bdi; /* backing dev info */ - struct buffer_head *bh_sr; -}; - - -void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *, - struct nilfs_write_info *); -int nilfs_segbuf_write(struct nilfs_segment_buffer *, - struct nilfs_write_info *); -int nilfs_segbuf_wait(struct nilfs_segment_buffer *, - struct nilfs_write_info *); - #endif /* _NILFS_SEGBUF_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 6eff66a070d..17584c52448 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -974,12 +974,12 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, nilfs->ns_nongc_ctime : sci->sc_seg_ctime); raw_sr->sr_flags = 0; - nilfs_mdt_write_inode_direct( - nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); - nilfs_mdt_write_inode_direct( - nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); - nilfs_mdt_write_inode_direct( - nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); + nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr + + NILFS_SR_DAT_OFFSET(isz), 1); + nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + + NILFS_SR_CPFILE_OFFSET(isz), 1); + nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + + NILFS_SR_SUFILE_OFFSET(isz), 1); } static void nilfs_redirty_inodes(struct list_head *head) @@ -1273,73 +1273,75 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) return err; } -static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) -{ - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; - int err; - - err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su); - if (unlikely(err)) - return err; - nilfs_mdt_mark_buffer_dirty(bh_su); - nilfs_mdt_mark_dirty(sufile); - nilfs_sufile_put_segment_usage(sufile, segnum, bh_su); - return 0; -} - +/** + * nilfs_segctor_begin_construction - setup segment buffer to make a new log + * @sci: nilfs_sc_info + * @nilfs: nilfs object + */ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct nilfs_segment_buffer *segbuf, *n; + struct nilfs_segment_buffer *segbuf, *prev; __u64 nextnum; - int err; + int err, alloc = 0; - if (list_empty(&sci->sc_segbufs)) { - segbuf = nilfs_segbuf_new(sci->sc_super); - if (unlikely(!segbuf)) - return -ENOMEM; - list_add(&segbuf->sb_list, &sci->sc_segbufs); - } else - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + segbuf = nilfs_segbuf_new(sci->sc_super); + if (unlikely(!segbuf)) + return -ENOMEM; + + if (list_empty(&sci->sc_write_logs)) { + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, + nilfs->ns_pseg_offset, nilfs); + if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { + nilfs_shift_to_next_segment(nilfs); + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + } - nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, - nilfs); + segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; + nextnum = nilfs->ns_nextnum; - if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { - nilfs_shift_to_next_segment(nilfs); - nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + if (nilfs->ns_segnum == nilfs->ns_nextnum) + /* Start from the head of a new full segment */ + alloc++; + } else { + /* Continue logs */ + prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs); + nilfs_segbuf_map_cont(segbuf, prev); + segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq; + nextnum = prev->sb_nextnum; + + if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { + nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); + segbuf->sb_sum.seg_seq++; + alloc++; + } } - sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; - err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); - if (unlikely(err)) - return err; + err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum); + if (err) + goto failed; - if (nilfs->ns_segnum == nilfs->ns_nextnum) { - /* Start from the head of a new full segment */ + if (alloc) { err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); - if (unlikely(err)) - return err; - } else - nextnum = nilfs->ns_nextnum; - - segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; + if (err) + goto failed; + } nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); - /* truncating segment buffers */ - list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, - sb_list) { - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); - } + BUG_ON(!list_empty(&sci->sc_segbufs)); + list_add_tail(&segbuf->sb_list, &sci->sc_segbufs); + sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; return 0; + + failed: + nilfs_segbuf_free(segbuf); + return err; } static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs, int nadd) { - struct nilfs_segment_buffer *segbuf, *prev, *n; + struct nilfs_segment_buffer *segbuf, *prev; struct inode *sufile = nilfs->ns_sufile; __u64 nextnextnum; LIST_HEAD(list); @@ -1352,7 +1354,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, * not be dirty. The following call ensures that the buffer is dirty * and will pin the buffer on memory until the sufile is written. */ - err = nilfs_touch_segusage(sufile, prev->sb_nextnum); + err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum); if (unlikely(err)) return err; @@ -1378,33 +1380,33 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, list_add_tail(&segbuf->sb_list, &list); prev = segbuf; } - list_splice(&list, sci->sc_segbufs.prev); + list_splice_tail(&list, &sci->sc_segbufs); return 0; failed_segbuf: nilfs_segbuf_free(segbuf); failed: - list_for_each_entry_safe(segbuf, n, &list, sb_list) { + list_for_each_entry(segbuf, &list, sb_list) { ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); /* never fails */ - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); } + nilfs_destroy_logs(&list); return err; } -static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) +static void nilfs_free_incomplete_logs(struct list_head *logs, + struct the_nilfs *nilfs) { - struct nilfs_segment_buffer *segbuf; - int ret, done = 0; + struct nilfs_segment_buffer *segbuf, *prev; + struct inode *sufile = nilfs->ns_sufile; + int ret; - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + segbuf = NILFS_FIRST_SEGBUF(logs); if (nilfs->ns_nextnum != segbuf->sb_nextnum) { - ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); /* never fails */ } - if (segbuf->sb_io_error) { + if (atomic_read(&segbuf->sb_err)) { /* Case 1: The first segment failed */ if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) /* Case 1a: Partial segment appended into an existing @@ -1413,106 +1415,54 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, segbuf->sb_fseg_end); else /* Case 1b: New full segment */ set_nilfs_discontinued(nilfs); - done++; } - list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); - WARN_ON(ret); /* never fails */ - if (!done && segbuf->sb_io_error) { - if (segbuf->sb_segnum != nilfs->ns_nextnum) - /* Case 2: extended segment (!= next) failed */ - nilfs_sufile_set_error(nilfs->ns_sufile, - segbuf->sb_segnum); - done++; - } - } -} - -static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci) -{ - struct nilfs_segment_buffer *segbuf; - - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) - nilfs_segbuf_clear(segbuf); - sci->sc_super_root = NULL; -} - -static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci) -{ - struct nilfs_segment_buffer *segbuf; - - while (!list_empty(&sci->sc_segbufs)) { - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); - } - /* sci->sc_curseg = NULL; */ -} - -static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs, int err) -{ - if (unlikely(err)) { - nilfs_segctor_free_incomplete_segments(sci, nilfs); - if (sci->sc_stage.flags & NILFS_CF_SUFREED) { - int ret; - - ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, - sci->sc_nfreesegs, - NULL); - WARN_ON(ret); /* do not happen */ + prev = segbuf; + list_for_each_entry_continue(segbuf, logs, sb_list) { + if (prev->sb_nextnum != segbuf->sb_nextnum) { + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); + WARN_ON(ret); /* never fails */ } + if (atomic_read(&segbuf->sb_err) && + segbuf->sb_segnum != nilfs->ns_nextnum) + /* Case 2: extended segment (!= next) failed */ + nilfs_sufile_set_error(sufile, segbuf->sb_segnum); + prev = segbuf; } - nilfs_segctor_clear_segment_buffers(sci); } static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, struct inode *sufile) { struct nilfs_segment_buffer *segbuf; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; unsigned long live_blocks; int ret; list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); - WARN_ON(ret); /* always succeed because bh_su is dirty */ live_blocks = segbuf->sb_sum.nblocks + (segbuf->sb_pseg_start - segbuf->sb_fseg_start); - raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); - raw_su->su_nblocks = cpu_to_le32(live_blocks); - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, - bh_su); + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + live_blocks, + sci->sc_seg_ctime); + WARN_ON(ret); /* always succeed because the segusage is dirty */ } } -static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, - struct inode *sufile) +static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile) { struct nilfs_segment_buffer *segbuf; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; int ret; - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); - WARN_ON(ret); /* always succeed because bh_su is dirty */ - raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - - segbuf->sb_fseg_start); - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); + segbuf = NILFS_FIRST_SEGBUF(logs); + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + segbuf->sb_pseg_start - + segbuf->sb_fseg_start, 0); + WARN_ON(ret); /* always succeed because the segusage is dirty */ - list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); + list_for_each_entry_continue(segbuf, logs, sb_list) { + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + 0, 0); WARN_ON(ret); /* always succeed */ - raw_su->su_nblocks = 0; - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, - bh_su); } } @@ -1520,17 +1470,15 @@ static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, struct nilfs_segment_buffer *last, struct inode *sufile) { - struct nilfs_segment_buffer *segbuf = last, *n; + struct nilfs_segment_buffer *segbuf = last; int ret; - list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, - sb_list) { - list_del_init(&segbuf->sb_list); + list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); - nilfs_segbuf_free(segbuf); } + nilfs_truncate_logs(&sci->sc_segbufs, last); } @@ -1569,7 +1517,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, NULL); WARN_ON(err); /* do not happen */ } - nilfs_segctor_clear_segment_buffers(sci); + nilfs_clear_logs(&sci->sc_segbufs); err = nilfs_segctor_extend_segments(sci, nilfs, nadd); if (unlikely(err)) @@ -1814,26 +1762,18 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, } static int nilfs_segctor_write(struct nilfs_sc_info *sci, - struct backing_dev_info *bdi) + struct the_nilfs *nilfs) { struct nilfs_segment_buffer *segbuf; - struct nilfs_write_info wi; - int err, res; - - wi.sb = sci->sc_super; - wi.bh_sr = sci->sc_super_root; - wi.bdi = bdi; + int ret = 0; list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - nilfs_segbuf_prepare_write(segbuf, &wi); - err = nilfs_segbuf_write(segbuf, &wi); - - res = nilfs_segbuf_wait(segbuf, &wi); - err = err ? : res; - if (err) - return err; + ret = nilfs_segbuf_write(segbuf, nilfs); + if (ret) + break; } - return 0; + list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs); + return ret; } static void __nilfs_end_page_io(struct page *page, int err) @@ -1911,15 +1851,17 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err) } } -static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, - struct page *failed_page, int err) +static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, + struct buffer_head *bh_sr, int err) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; + struct buffer_head *bh; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; + if (list_empty(logs)) + return; + list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { if (bh->b_page != bd_page) { @@ -1931,7 +1873,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == sci->sc_super_root) { + if (bh == bh_sr) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; @@ -1941,7 +1883,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); if (fs_page && fs_page == failed_page) - goto done; + return; fs_page = bh->b_page; } } @@ -1950,8 +1892,34 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, end_page_writeback(bd_page); nilfs_end_page_io(fs_page, err); - done: +} + +static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int err) +{ + LIST_HEAD(logs); + int ret; + + list_splice_tail_init(&sci->sc_write_logs, &logs); + ret = nilfs_wait_on_logs(&logs); + if (ret) + nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret); + + list_splice_tail_init(&sci->sc_segbufs, &logs); + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); + nilfs_free_incomplete_logs(&logs, nilfs); nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); + + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(ret); /* do not happen */ + } + + nilfs_destroy_logs(&logs); + sci->sc_super_root = NULL; } static void nilfs_set_next_segment(struct the_nilfs *nilfs, @@ -1973,7 +1941,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) struct the_nilfs *nilfs = sbi->s_nilfs; int update_sr = (sci->sc_super_root != NULL); - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { struct buffer_head *bh; list_for_each_entry(bh, &segbuf->sb_segsum_buffers, @@ -2046,7 +2014,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) sci->sc_nblk_inc += sci->sc_nblk_this_inc; - segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs); nilfs_set_next_segment(nilfs, segbuf); if (update_sr) { @@ -2057,10 +2025,23 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); + nilfs_segctor_clear_metadata_dirty(sci); } else clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); } +static int nilfs_segctor_wait(struct nilfs_sc_info *sci) +{ + int ret; + + ret = nilfs_wait_on_logs(&sci->sc_write_logs); + if (!ret) { + nilfs_segctor_complete_write(sci); + nilfs_destroy_logs(&sci->sc_write_logs); + } + return ret; +} + static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi) { @@ -2173,7 +2154,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Avoid empty segment */ if (sci->sc_stage.scnt == NILFS_ST_DONE && NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { - nilfs_segctor_end_construction(sci, nilfs, 1); + nilfs_segctor_abort_construction(sci, nilfs, 1); goto out; } @@ -2187,7 +2168,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (has_sr) { err = nilfs_segctor_fill_in_checkpoint(sci); if (unlikely(err)) - goto failed_to_make_up; + goto failed_to_write; nilfs_segctor_fill_in_super_root(sci, nilfs); } @@ -2195,42 +2176,46 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Write partial segments */ err = nilfs_segctor_prepare_write(sci, &failed_page); - if (unlikely(err)) + if (err) { + nilfs_abort_logs(&sci->sc_segbufs, failed_page, + sci->sc_super_root, err); goto failed_to_write; - + } nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); - err = nilfs_segctor_write(sci, nilfs->ns_bdi); + err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) goto failed_to_write; - nilfs_segctor_complete_write(sci); - - /* Commit segments */ - if (has_sr) - nilfs_segctor_clear_metadata_dirty(sci); - - nilfs_segctor_end_construction(sci, nilfs, 0); - + if (sci->sc_stage.scnt == NILFS_ST_DONE || + nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { + /* + * At this point, we avoid double buffering + * for blocksize < pagesize because page dirty + * flag is turned off during write and dirty + * buffers are not properly collected for + * pages crossing over segments. + */ + err = nilfs_segctor_wait(sci); + if (err) + goto failed_to_write; + } } while (sci->sc_stage.scnt != NILFS_ST_DONE); + sci->sc_super_root = NULL; + out: - nilfs_segctor_destroy_segment_buffers(sci); nilfs_segctor_check_out_files(sci, sbi); return err; failed_to_write: - nilfs_segctor_abort_write(sci, failed_page, err); - nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile); - - failed_to_make_up: if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) nilfs_redirty_inodes(&sci->sc_dirty_files); failed: if (nilfs_doing_gc()) nilfs_redirty_inodes(&sci->sc_gc_inodes); - nilfs_segctor_end_construction(sci, nilfs, err); + nilfs_segctor_abort_construction(sci, nilfs, err); goto out; } @@ -2559,7 +2544,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, sci->sc_freesegs = kbufs[4]; sci->sc_nfreesegs = argv[4].v_nmembs; - list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); + list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes); for (;;) { nilfs_segctor_accept(sci, &req); @@ -2788,6 +2773,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); + INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); INIT_LIST_HEAD(&sci->sc_copied_buffers); @@ -2855,6 +2841,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) } WARN_ON(!list_empty(&sci->sc_segbufs)); + WARN_ON(!list_empty(&sci->sc_write_logs)); down_write(&sbi->s_nilfs->ns_segctor_sem); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 0d2a475a741..3d3ab2f9864 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -97,6 +97,7 @@ struct nilfs_segsum_pointer { * @sc_dsync_start: start byte offset of data pages * @sc_dsync_end: end byte offset of data pages (inclusive) * @sc_segbufs: List of segment buffers + * @sc_write_logs: List of segment buffers to hold logs under writing * @sc_segbuf_nblocks: Number of available blocks in segment buffers. * @sc_curseg: Current segment buffer * @sc_super_root: Pointer to the super root buffer @@ -143,6 +144,7 @@ struct nilfs_sc_info { /* Segment buffers */ struct list_head sc_segbufs; + struct list_head sc_write_logs; unsigned long sc_segbuf_nblocks; struct nilfs_segment_buffer *sc_curseg; struct buffer_head *sc_super_root; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 37994d4a59c..b6c36d0cc33 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -31,6 +31,16 @@ #include "sufile.h" +struct nilfs_sufile_info { + struct nilfs_mdt_info mi; + unsigned long ncleansegs; +}; + +static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) +{ + return (struct nilfs_sufile_info *)NILFS_MDT(sufile); +} + static inline unsigned long nilfs_sufile_segment_usages_per_block(const struct inode *sufile) { @@ -62,14 +72,6 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, max - curr + 1); } -static inline struct nilfs_sufile_header * -nilfs_sufile_block_get_header(const struct inode *sufile, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh); -} - static struct nilfs_segment_usage * nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, struct buffer_head *bh, void *kaddr) @@ -110,6 +112,15 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, } /** + * nilfs_sufile_get_ncleansegs - return the number of clean segments + * @sufile: inode of segment usage file + */ +unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) +{ + return NILFS_SUI(sufile)->ncleansegs; +} + +/** * nilfs_sufile_updatev - modify multiple segment usages at a time * @sufile: inode of segment usage file * @segnumv: array of segment numbers @@ -270,7 +281,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) if (ret < 0) goto out_sem; kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + header = kaddr + bh_offset(header_bh); ncleansegs = le64_to_cpu(header->sh_ncleansegs); last_alloc = le64_to_cpu(header->sh_last_alloc); kunmap_atomic(kaddr, KM_USER0); @@ -302,13 +313,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) kunmap_atomic(kaddr, KM_USER0); kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header( - sufile, header_bh, kaddr); + header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); kunmap_atomic(kaddr, KM_USER0); + NILFS_SUI(sufile)->ncleansegs--; nilfs_mdt_mark_buffer_dirty(header_bh); nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); @@ -351,6 +362,8 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, kunmap_atomic(kaddr, KM_USER0); nilfs_sufile_mod_counter(header_bh, -1, 1); + NILFS_SUI(sufile)->ncleansegs--; + nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -380,6 +393,8 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, kunmap_atomic(kaddr, KM_USER0); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); + NILFS_SUI(sufile)->ncleansegs -= clean; + nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -409,79 +424,65 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); + NILFS_SUI(sufile)->ncleansegs++; + nilfs_mdt_mark_dirty(sufile); } /** - * nilfs_sufile_get_segment_usage - get a segment usage + * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty * @sufile: inode of segment usage file * @segnum: segment number - * @sup: pointer to segment usage - * @bhp: pointer to buffer head - * - * Description: nilfs_sufile_get_segment_usage() acquires the segment usage - * specified by @segnum. - * - * Return Value: On success, 0 is returned, and the segment usage and the - * buffer head of the buffer on which the segment usage is located are stored - * in the place pointed by @sup and @bhp, respectively. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid segment usage number. */ -int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, - struct nilfs_segment_usage **sup, - struct buffer_head **bhp) +int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; - struct nilfs_segment_usage *su; - void *kaddr; int ret; - /* segnum is 0 origin */ - if (segnum >= nilfs_sufile_get_nsegments(sufile)) - return -EINVAL; - down_write(&NILFS_MDT(sufile)->mi_sem); - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh); - if (ret < 0) - goto out_sem; - kaddr = kmap(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - if (nilfs_segment_usage_error(su)) { - kunmap(bh->b_page); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); + if (!ret) { + nilfs_mdt_mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); brelse(bh); - ret = -EINVAL; - goto out_sem; } - - if (sup != NULL) - *sup = su; - *bhp = bh; - - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** - * nilfs_sufile_put_segment_usage - put a segment usage + * nilfs_sufile_set_segment_usage - set usage of a segment * @sufile: inode of segment usage file * @segnum: segment number - * @bh: buffer head - * - * Description: nilfs_sufile_put_segment_usage() releases the segment usage - * specified by @segnum. @bh must be the buffer head which have been returned - * by a previous call to nilfs_sufile_get_segment_usage() with @segnum. + * @nblocks: number of live blocks in the segment + * @modtime: modification time (option) */ -void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, - struct buffer_head *bh) +int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, + unsigned long nblocks, time_t modtime) { - kunmap(bh->b_page); + struct buffer_head *bh; + struct nilfs_segment_usage *su; + void *kaddr; + int ret; + + down_write(&NILFS_MDT(sufile)->mi_sem); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); + if (ret < 0) + goto out_sem; + + kaddr = kmap_atomic(bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + WARN_ON(nilfs_segment_usage_error(su)); + if (modtime) + su->su_lastmod = cpu_to_le64(modtime); + su->su_nblocks = cpu_to_le32(nblocks); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); brelse(bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; } /** @@ -515,7 +516,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) goto out_sem; kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + header = kaddr + bh_offset(header_bh); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); @@ -532,33 +533,6 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) return ret; } -/** - * nilfs_sufile_get_ncleansegs - get the number of clean segments - * @sufile: inode of segment usage file - * @nsegsp: pointer to the number of clean segments - * - * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean - * segments. - * - * Return Value: On success, 0 is returned and the number of clean segments is - * stored in the place pointed by @nsegsp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) -{ - struct nilfs_sustat sustat; - int ret; - - ret = nilfs_sufile_get_stat(sufile, &sustat); - if (ret == 0) - *nsegsp = sustat.ss_ncleansegs; - return ret; -} - void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) @@ -577,8 +551,10 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, nilfs_segment_usage_set_error(su); kunmap_atomic(kaddr, KM_USER0); - if (suclean) + if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); + NILFS_SUI(sufile)->ncleansegs--; + } nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -657,3 +633,48 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, up_read(&NILFS_MDT(sufile)->mi_sem); return ret; } + +/** + * nilfs_sufile_read - read sufile inode + * @sufile: sufile inode + * @raw_inode: on-disk sufile inode + */ +int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode) +{ + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + struct buffer_head *header_bh; + struct nilfs_sufile_header *header; + void *kaddr; + int ret; + + ret = nilfs_read_inode_common(sufile, raw_inode); + if (ret < 0) + return ret; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (!ret) { + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = kaddr + bh_offset(header_bh); + sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); + kunmap_atomic(kaddr, KM_USER0); + brelse(header_bh); + } + return ret; +} + +/** + * nilfs_sufile_new - create sufile + * @nilfs: nilfs object + * @susize: size of a segment usage entry + */ +struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize) +{ + struct inode *sufile; + + sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, + sizeof(struct nilfs_sufile_info)); + if (sufile) + nilfs_mdt_set_entry_size(sufile, susize, + sizeof(struct nilfs_sufile_header)); + return sufile; +} diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 0e99e5c0bd0..15163b8aff7 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -34,14 +34,13 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; } +unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); + int nilfs_sufile_alloc(struct inode *, __u64 *); -int nilfs_sufile_get_segment_usage(struct inode *, __u64, - struct nilfs_segment_usage **, - struct buffer_head **); -void nilfs_sufile_put_segment_usage(struct inode *, __u64, - struct buffer_head *); +int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); +int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, + unsigned long nblocks, time_t modtime); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); -int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, size_t); @@ -62,6 +61,9 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode); +struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize); + /** * nilfs_sufile_scrap - make a segment garbage * @sufile: inode of segment usage file diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 644e66727dd..5403b3ef3a4 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -363,14 +363,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) list_add(&sbi->s_list, &nilfs->ns_supers); up_write(&nilfs->ns_super_sem); - sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO); + sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); if (!sbi->s_ifile) return -ENOMEM; - err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size); - if (unlikely(err)) - goto failed; - down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, &bh_cp); @@ -411,7 +407,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) { struct the_nilfs *nilfs = sbi->s_nilfs; - nilfs_mdt_clear(sbi->s_ifile); nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; down_write(&nilfs->ns_super_sem); @@ -419,22 +414,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) up_write(&nilfs->ns_super_sem); } -static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) -{ - struct the_nilfs *nilfs = sbi->s_nilfs; - int err = 0; - - down_write(&nilfs->ns_sem); - if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { - nilfs->ns_mount_state |= NILFS_VALID_FS; - err = nilfs_commit_super(sbi, 1); - if (likely(!err)) - printk(KERN_INFO "NILFS: recovery complete.\n"); - } - up_write(&nilfs->ns_sem); - return err; -} - static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; @@ -490,7 +469,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) struct nilfs_sb_info *sbi = NILFS_SB(sb); if (!nilfs_test_opt(sbi, BARRIER)) - seq_printf(seq, ",barrier=off"); + seq_printf(seq, ",nobarrier"); if (nilfs_test_opt(sbi, SNAPSHOT)) seq_printf(seq, ",cp=%llu", (unsigned long long int)sbi->s_snapshot_cno); @@ -500,6 +479,8 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",errors=panic"); if (nilfs_test_opt(sbi, STRICT_ORDER)) seq_printf(seq, ",order=strict"); + if (nilfs_test_opt(sbi, NORECOVERY)) + seq_printf(seq, ",norecovery"); return 0; } @@ -568,7 +549,7 @@ static const struct export_operations nilfs_export_ops = { enum { Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_barrier, Opt_snapshot, Opt_order, + Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, Opt_err, }; @@ -576,25 +557,13 @@ static match_table_t tokens = { {Opt_err_cont, "errors=continue"}, {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, - {Opt_barrier, "barrier=%s"}, + {Opt_nobarrier, "nobarrier"}, {Opt_snapshot, "cp=%u"}, {Opt_order, "order=%s"}, + {Opt_norecovery, "norecovery"}, {Opt_err, NULL} }; -static int match_bool(substring_t *s, int *result) -{ - int len = s->to - s->from; - - if (strncmp(s->from, "on", len) == 0) - *result = 1; - else if (strncmp(s->from, "off", len) == 0) - *result = 0; - else - return 1; - return 0; -} - static int parse_options(char *options, struct super_block *sb) { struct nilfs_sb_info *sbi = NILFS_SB(sb); @@ -612,13 +581,8 @@ static int parse_options(char *options, struct super_block *sb) token = match_token(p, tokens, args); switch (token) { - case Opt_barrier: - if (match_bool(&args[0], &option)) - return 0; - if (option) - nilfs_set_opt(sbi, BARRIER); - else - nilfs_clear_opt(sbi, BARRIER); + case Opt_nobarrier: + nilfs_clear_opt(sbi, BARRIER); break; case Opt_order: if (strcmp(args[0].from, "relaxed") == 0) @@ -647,6 +611,9 @@ static int parse_options(char *options, struct super_block *sb) sbi->s_snapshot_cno = option; nilfs_set_opt(sbi, SNAPSHOT); break; + case Opt_norecovery: + nilfs_set_opt(sbi, NORECOVERY); + break; default: printk(KERN_ERR "NILFS: Unrecognized mount option \"%s\"\n", p); @@ -672,9 +639,7 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) int mnt_count = le16_to_cpu(sbp->s_mnt_count); /* nilfs->sem must be locked by the caller. */ - if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { - printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); - } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) { + if (nilfs->ns_mount_state & NILFS_ERROR_FS) { printk(KERN_WARNING "NILFS warning: mounting fs with errors\n"); #if 0 @@ -782,11 +747,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, sb->s_root = NULL; sb->s_time_gran = 1; - if (!nilfs_loaded(nilfs)) { - err = load_nilfs(nilfs, sbi); - if (err) - goto failed_sbi; - } + err = load_nilfs(nilfs, sbi); + if (err) + goto failed_sbi; + cno = nilfs_last_cno(nilfs); if (sb->s_flags & MS_RDONLY) { @@ -854,12 +818,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, up_write(&nilfs->ns_sem); } - err = nilfs_mark_recovery_complete(sbi); - if (unlikely(err)) { - printk(KERN_ERR "NILFS: recovery failed.\n"); - goto failed_root; - } - down_write(&nilfs->ns_super_sem); if (!nilfs_test_opt(sbi, SNAPSHOT)) nilfs->ns_current = sbi; @@ -867,10 +825,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, return 0; - failed_root: - dput(sb->s_root); - sb->s_root = NULL; - failed_segctor: nilfs_detach_segment_constructor(sbi); @@ -915,6 +869,14 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (!nilfs_valid_fs(nilfs)) { + printk(KERN_WARNING "NILFS (device %s): couldn't " + "remount because the filesystem is in an " + "incomplete recovery state.\n", sb->s_id); + err = -EINVAL; + goto restore_opts; + } + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) goto out; if (*flags & MS_RDONLY) { diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index ad391a8c3e7..6241e1722ef 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -146,13 +146,9 @@ void put_nilfs(struct the_nilfs *nilfs) might_sleep(); if (nilfs_loaded(nilfs)) { - nilfs_mdt_clear(nilfs->ns_sufile); nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_clear(nilfs->ns_cpfile); nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_clear(nilfs->ns_dat); nilfs_mdt_destroy(nilfs->ns_dat); - /* XXX: how and when to clear nilfs->ns_gc_dat? */ nilfs_mdt_destroy(nilfs->ns_gc_dat); } if (nilfs_init(nilfs)) { @@ -166,7 +162,6 @@ void put_nilfs(struct the_nilfs *nilfs) static int nilfs_load_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, sector_t sr_block) { - static struct lock_class_key dat_lock_key; struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; struct nilfs_super_block **sbp = nilfs->ns_sbp; @@ -187,51 +182,36 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, inode_size = nilfs->ns_inode_size; err = -ENOMEM; - nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); + nilfs->ns_dat = nilfs_dat_new(nilfs, dat_entry_size); if (unlikely(!nilfs->ns_dat)) goto failed; - nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); + nilfs->ns_gc_dat = nilfs_dat_new(nilfs, dat_entry_size); if (unlikely(!nilfs->ns_gc_dat)) goto failed_dat; - nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO); + nilfs->ns_cpfile = nilfs_cpfile_new(nilfs, checkpoint_size); if (unlikely(!nilfs->ns_cpfile)) goto failed_gc_dat; - nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO); + nilfs->ns_sufile = nilfs_sufile_new(nilfs, segment_usage_size); if (unlikely(!nilfs->ns_sufile)) goto failed_cpfile; - err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size); - if (unlikely(err)) - goto failed_sufile; - - lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key); - lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key); - nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); - nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, - sizeof(struct nilfs_cpfile_header)); - nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size, - sizeof(struct nilfs_sufile_header)); - err = nilfs_mdt_read_inode_direct( - nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); + err = nilfs_dat_read(nilfs->ns_dat, (void *)bh_sr->b_data + + NILFS_SR_DAT_OFFSET(inode_size)); if (unlikely(err)) goto failed_sufile; - err = nilfs_mdt_read_inode_direct( - nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); + err = nilfs_cpfile_read(nilfs->ns_cpfile, (void *)bh_sr->b_data + + NILFS_SR_CPFILE_OFFSET(inode_size)); if (unlikely(err)) goto failed_sufile; - err = nilfs_mdt_read_inode_direct( - nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); + err = nilfs_sufile_read(nilfs->ns_sufile, (void *)bh_sr->b_data + + NILFS_SR_SUFILE_OFFSET(inode_size)); if (unlikely(err)) goto failed_sufile; @@ -281,29 +261,30 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) struct nilfs_recovery_info ri; unsigned int s_flags = sbi->s_super->s_flags; int really_read_only = bdev_read_only(nilfs->ns_bdev); - unsigned valid_fs; - int err = 0; - - nilfs_init_recovery_info(&ri); + int valid_fs = nilfs_valid_fs(nilfs); + int err; - down_write(&nilfs->ns_sem); - valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); - up_write(&nilfs->ns_sem); + if (nilfs_loaded(nilfs)) { + if (valid_fs || + ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY))) + return 0; + printk(KERN_ERR "NILFS: the filesystem is in an incomplete " + "recovery state.\n"); + return -EINVAL; + } - if (!valid_fs && (s_flags & MS_RDONLY)) { - printk(KERN_INFO "NILFS: INFO: recovery " - "required for readonly filesystem.\n"); - if (really_read_only) { - printk(KERN_ERR "NILFS: write access " - "unavailable, cannot proceed.\n"); - err = -EROFS; - goto failed; + if (!valid_fs) { + printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); + if (s_flags & MS_RDONLY) { + printk(KERN_INFO "NILFS: INFO: recovery " + "required for readonly filesystem.\n"); + printk(KERN_INFO "NILFS: write access will " + "be enabled during recovery.\n"); } - printk(KERN_INFO "NILFS: write access will " - "be enabled during recovery.\n"); - sbi->s_super->s_flags &= ~MS_RDONLY; } + nilfs_init_recovery_info(&ri); + err = nilfs_search_super_root(nilfs, sbi, &ri); if (unlikely(err)) { printk(KERN_ERR "NILFS: error searching super root.\n"); @@ -316,19 +297,56 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto failed; } - if (!valid_fs) { - err = nilfs_recover_logical_segments(nilfs, sbi, &ri); - if (unlikely(err)) { - nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_destroy(nilfs->ns_dat); - goto failed; + if (valid_fs) + goto skip_recovery; + + if (s_flags & MS_RDONLY) { + if (nilfs_test_opt(sbi, NORECOVERY)) { + printk(KERN_INFO "NILFS: norecovery option specified. " + "skipping roll-forward recovery\n"); + goto skip_recovery; } - if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) - sbi->s_super->s_dirt = 1; + if (really_read_only) { + printk(KERN_ERR "NILFS: write access " + "unavailable, cannot proceed.\n"); + err = -EROFS; + goto failed_unload; + } + sbi->s_super->s_flags &= ~MS_RDONLY; + } else if (nilfs_test_opt(sbi, NORECOVERY)) { + printk(KERN_ERR "NILFS: recovery cancelled because norecovery " + "option was specified for a read/write mount\n"); + err = -EINVAL; + goto failed_unload; } + err = nilfs_recover_logical_segments(nilfs, sbi, &ri); + if (err) + goto failed_unload; + + down_write(&nilfs->ns_sem); + nilfs->ns_mount_state |= NILFS_VALID_FS; + nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); + err = nilfs_commit_super(sbi, 1); + up_write(&nilfs->ns_sem); + + if (err) { + printk(KERN_ERR "NILFS: failed to update super block. " + "recovery unfinished.\n"); + goto failed_unload; + } + printk(KERN_INFO "NILFS: recovery complete.\n"); + + skip_recovery: set_nilfs_loaded(nilfs); + nilfs_clear_recovery_info(&ri); + sbi->s_super->s_flags = s_flags; + return 0; + + failed_unload: + nilfs_mdt_destroy(nilfs->ns_cpfile); + nilfs_mdt_destroy(nilfs->ns_sufile); + nilfs_mdt_destroy(nilfs->ns_dat); failed: nilfs_clear_recovery_info(&ri); @@ -632,30 +650,23 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { struct inode *dat = nilfs_dat_inode(nilfs); unsigned long ncleansegs; - int err; down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); + ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - if (likely(!err)) - *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; - return err; + *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; + return 0; } int nilfs_near_disk_full(struct the_nilfs *nilfs) { - struct inode *sufile = nilfs->ns_sufile; unsigned long ncleansegs, nincsegs; - int ret; - ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); - if (likely(!ret)) { - nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / - nilfs->ns_blocks_per_segment + 1; - if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) - ret++; - } - return ret; + ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); + nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / + nilfs->ns_blocks_per_segment + 1; + + return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs; } /** diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 20abd55881e..589786e3346 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -258,6 +258,16 @@ static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) kfree(sbi); } +static inline int nilfs_valid_fs(struct the_nilfs *nilfs) +{ + unsigned valid_fs; + + down_read(&nilfs->ns_sem); + valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); + up_read(&nilfs->ns_sem); + return valid_fs; +} + static inline void nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, sector_t *seg_start, sector_t *seg_end) diff --git a/include/acpi/acpi_hest.h b/include/acpi/acpi_hest.h new file mode 100644 index 00000000000..63194d03cb2 --- /dev/null +++ b/include/acpi/acpi_hest.h @@ -0,0 +1,12 @@ +#ifndef __ACPI_HEST_H +#define __ACPI_HEST_H + +#include <linux/pci.h> + +#ifdef CONFIG_ACPI +extern int acpi_hest_firmware_first_pci(struct pci_dev *pci); +#else +static inline int acpi_hest_firmware_first_pci(struct pci_dev *pci) { return 0; } +#endif + +#endif diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index ce520402e84..3fe02cf8b65 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -151,6 +151,8 @@ struct nilfs_super_root { #define NILFS_MOUNT_BARRIER 0x1000 /* Use block barriers */ #define NILFS_MOUNT_STRICT_ORDER 0x2000 /* Apply strict in-order semantics also for data */ +#define NILFS_MOUNT_NORECOVERY 0x4000 /* Disable write access during + mount-time recovery */ /** @@ -403,6 +405,28 @@ struct nilfs_segment_summary { #define NILFS_SS_GC 0x0010 /* segment written for cleaner operation */ /** + * struct nilfs_btree_node - B-tree node + * @bn_flags: flags + * @bn_level: level + * @bn_nchildren: number of children + * @bn_pad: padding + */ +struct nilfs_btree_node { + __u8 bn_flags; + __u8 bn_level; + __le16 bn_nchildren; + __le32 bn_pad; +}; + +/* flags */ +#define NILFS_BTREE_NODE_ROOT 0x01 + +/* level */ +#define NILFS_BTREE_LEVEL_DATA 0 +#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) +#define NILFS_BTREE_LEVEL_MAX 14 + +/** * struct nilfs_palloc_group_desc - block group descriptor * @pg_nfrees: number of free entries in block group */ diff --git a/include/linux/pci.h b/include/linux/pci.h index f5c7cd343e5..04771b9c331 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -218,6 +218,7 @@ struct pci_dev { unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ + u8 pcie_cap; /* PCI-E capability offset */ u8 pcie_type; /* PCI-E device/port type */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ @@ -280,6 +281,7 @@ struct pci_dev { unsigned int is_virtfn:1; unsigned int reset_fn:1; unsigned int is_hotplug_bridge:1; + unsigned int aer_firmware_first:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -635,7 +637,13 @@ struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, struct pci_dev *from); struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); -struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn); +struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, + unsigned int devfn); +static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, + unsigned int devfn) +{ + return pci_get_domain_bus_and_slot(0, bus, devfn); +} struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); int pci_dev_present(const struct pci_device_id *ids); @@ -701,6 +709,7 @@ void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); void pci_clear_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); +int pci_set_cacheline_size(struct pci_dev *dev); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); @@ -1246,6 +1255,8 @@ extern int pci_pci_problems; extern unsigned long pci_cardbus_io_size; extern unsigned long pci_cardbus_mem_size; +extern u8 pci_dfl_cache_line_size; +extern u8 pci_cache_line_size; extern unsigned long pci_hotplug_io_size; extern unsigned long pci_hotplug_mem_size; @@ -1290,5 +1301,34 @@ extern void pci_hp_create_module_link(struct pci_slot *pci_slot); extern void pci_hp_remove_module_link(struct pci_slot *pci_slot); #endif +/** + * pci_pcie_cap - get the saved PCIe capability offset + * @dev: PCI device + * + * PCIe capability offset is calculated at PCI device initialization + * time and saved in the data structure. This function returns saved + * PCIe capability offset. Using this instead of pci_find_capability() + * reduces unnecessary search in the PCI configuration space. If you + * need to calculate PCIe capability offset from raw device for some + * reasons, please use pci_find_capability() instead. + */ +static inline int pci_pcie_cap(struct pci_dev *dev) +{ + return dev->pcie_cap; +} + +/** + * pci_is_pcie - check if the PCI device is PCI Express capable + * @dev: PCI device + * + * Retrun true if the PCI device is PCI Express capable, false otherwise. + */ +static inline bool pci_is_pcie(struct pci_dev *dev) +{ + return !!pci_pcie_cap(dev); +} + +void pci_request_acs(void); + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index dd0bed4f1cf..9f2ad0aa3c3 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -365,6 +365,11 @@ #define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ #define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ +/* PCI Bridge Subsystem ID registers */ + +#define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */ +#define PCI_SSVID_DEVICE_ID 6 /* PCI-Bridge subsystem device id register */ + /* PCI Express capability registers */ #define PCI_EXP_FLAGS 2 /* Capabilities register */ @@ -502,6 +507,7 @@ #define PCI_EXT_CAP_ID_VC 2 #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 +#define PCI_EXT_CAP_ID_ACS 13 #define PCI_EXT_CAP_ID_ARI 14 #define PCI_EXT_CAP_ID_ATS 15 #define PCI_EXT_CAP_ID_SRIOV 16 @@ -662,4 +668,16 @@ #define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ #define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ +/* Access Control Service */ +#define PCI_ACS_CAP 0x04 /* ACS Capability Register */ +#define PCI_ACS_SV 0x01 /* Source Validation */ +#define PCI_ACS_TB 0x02 /* Translation Blocking */ +#define PCI_ACS_RR 0x04 /* P2P Request Redirect */ +#define PCI_ACS_CR 0x08 /* P2P Completion Redirect */ +#define PCI_ACS_UF 0x10 /* Upstream Forwarding */ +#define PCI_ACS_EC 0x20 /* P2P Egress Control */ +#define PCI_ACS_DT 0x40 /* Direct Translated P2P */ +#define PCI_ACS_CTRL 0x06 /* ACS Control Register */ +#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index b4c79545330..6775532b92a 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -10,10 +10,7 @@ #define _PCIEPORT_IF_H_ /* Port Type */ -#define PCIE_RC_PORT 4 /* Root port of RC */ -#define PCIE_SW_UPSTREAM_PORT 5 /* Upstream port of Switch */ -#define PCIE_SW_DOWNSTREAM_PORT 6 /* Downstream port of Switch */ -#define PCIE_ANY_PORT 7 +#define PCIE_ANY_PORT (~0) /* Service Type */ #define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ @@ -25,17 +22,6 @@ #define PCIE_PORT_SERVICE_VC_SHIFT 3 /* Virtual Channel */ #define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) -/* Root/Upstream/Downstream Port's Interrupt Mode */ -#define PCIE_PORT_NO_IRQ (-1) -#define PCIE_PORT_INTx_MODE 0 -#define PCIE_PORT_MSI_MODE 1 -#define PCIE_PORT_MSIX_MODE 2 - -struct pcie_port_data { - int port_type; /* Type of the port */ - int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ -}; - struct pcie_device { int irq; /* Service IRQ/MSI/MSI-X Vector */ struct pci_dev *port; /* Root/Upstream/Downstream Port */ diff --git a/include/xen/xen.h b/include/xen/xen.h new file mode 100644 index 00000000000..a16402418d3 --- /dev/null +++ b/include/xen/xen.h @@ -0,0 +1,32 @@ +#ifndef _XEN_XEN_H +#define _XEN_XEN_H + +enum xen_domain_type { + XEN_NATIVE, /* running on bare hardware */ + XEN_PV_DOMAIN, /* running in a PV domain */ + XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ +}; + +#ifdef CONFIG_XEN +extern enum xen_domain_type xen_domain_type; +#else +#define xen_domain_type XEN_NATIVE +#endif + +#define xen_domain() (xen_domain_type != XEN_NATIVE) +#define xen_pv_domain() (xen_domain() && \ + xen_domain_type == XEN_PV_DOMAIN) +#define xen_hvm_domain() (xen_domain() && \ + xen_domain_type == XEN_HVM_DOMAIN) + +#ifdef CONFIG_XEN_DOM0 +#include <xen/interface/xen.h> +#include <asm/xen/hypervisor.h> + +#define xen_initial_domain() (xen_pv_domain() && \ + xen_start_info->flags & SIF_INITDOMAIN) +#else /* !CONFIG_XEN_DOM0 */ +#define xen_initial_domain() (0) +#endif /* CONFIG_XEN_DOM0 */ + +#endif /* _XEN_XEN_H */ diff --git a/kernel/resource.c b/kernel/resource.c index fb11a58b959..dc15686b7a7 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -308,35 +308,37 @@ static int find_resource(struct resource *root, struct resource *new, void *alignf_data) { struct resource *this = root->child; + resource_size_t start, end; - new->start = root->start; + start = root->start; /* * Skip past an allocated resource that starts at 0, since the assignment * of this->start - 1 to new->end below would cause an underflow. */ if (this && this->start == 0) { - new->start = this->end + 1; + start = this->end + 1; this = this->sibling; } for(;;) { if (this) - new->end = this->start - 1; + end = this->start - 1; else - new->end = root->end; - if (new->start < min) - new->start = min; - if (new->end > max) - new->end = max; - new->start = ALIGN(new->start, align); + end = root->end; + if (start < min) + start = min; + if (end > max) + end = max; + start = ALIGN(start, align); if (alignf) alignf(alignf_data, new, size, align); - if (new->start < new->end && new->end - new->start >= size - 1) { - new->end = new->start + size - 1; + if (start < end && end - start >= size - 1) { + new->start = start; + new->end = start + size - 1; return 0; } if (!this) break; - new->start = this->end + 1; + start = this->end + 1; this = this->sibling; } return -EBUSY; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 33bed5e67a2..6438cd5599e 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -595,37 +595,89 @@ static char *symbol_string(char *buf, char *end, void *ptr, } static char *resource_string(char *buf, char *end, struct resource *res, - struct printf_spec spec) + struct printf_spec spec, const char *fmt) { #ifndef IO_RSRC_PRINTK_SIZE -#define IO_RSRC_PRINTK_SIZE 4 +#define IO_RSRC_PRINTK_SIZE 6 #endif #ifndef MEM_RSRC_PRINTK_SIZE -#define MEM_RSRC_PRINTK_SIZE 8 +#define MEM_RSRC_PRINTK_SIZE 10 #endif - struct printf_spec num_spec = { + struct printf_spec hex_spec = { .base = 16, .precision = -1, .flags = SPECIAL | SMALL | ZEROPAD, }; - /* room for the actual numbers, the two "0x", -, [, ] and the final zero */ - char sym[4*sizeof(resource_size_t) + 8]; + struct printf_spec dec_spec = { + .base = 10, + .precision = -1, + .flags = 0, + }; + struct printf_spec str_spec = { + .field_width = -1, + .precision = 10, + .flags = LEFT, + }; + struct printf_spec flag_spec = { + .base = 16, + .precision = -1, + .flags = SPECIAL | SMALL, + }; + + /* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8) + * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */ +#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4) +#define FLAG_BUF_SIZE (2 * sizeof(res->flags)) +#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref disabled]") +#define RAW_BUF_SIZE sizeof("[mem - flags 0x]") + char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, + 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; + char *p = sym, *pend = sym + sizeof(sym); - int size = -1; + int size = -1, addr = 0; + int decode = (fmt[0] == 'R') ? 1 : 0; - if (res->flags & IORESOURCE_IO) + if (res->flags & IORESOURCE_IO) { size = IO_RSRC_PRINTK_SIZE; - else if (res->flags & IORESOURCE_MEM) + addr = 1; + } else if (res->flags & IORESOURCE_MEM) { size = MEM_RSRC_PRINTK_SIZE; + addr = 1; + } *p++ = '['; - num_spec.field_width = size; - p = number(p, pend, res->start, num_spec); - *p++ = '-'; - p = number(p, pend, res->end, num_spec); + if (res->flags & IORESOURCE_IO) + p = string(p, pend, "io ", str_spec); + else if (res->flags & IORESOURCE_MEM) + p = string(p, pend, "mem ", str_spec); + else if (res->flags & IORESOURCE_IRQ) + p = string(p, pend, "irq ", str_spec); + else if (res->flags & IORESOURCE_DMA) + p = string(p, pend, "dma ", str_spec); + else { + p = string(p, pend, "??? ", str_spec); + decode = 0; + } + hex_spec.field_width = size; + p = number(p, pend, res->start, addr ? hex_spec : dec_spec); + if (res->start != res->end) { + *p++ = '-'; + p = number(p, pend, res->end, addr ? hex_spec : dec_spec); + } + if (decode) { + if (res->flags & IORESOURCE_MEM_64) + p = string(p, pend, " 64bit", str_spec); + if (res->flags & IORESOURCE_PREFETCH) + p = string(p, pend, " pref", str_spec); + if (res->flags & IORESOURCE_DISABLED) + p = string(p, pend, " disabled", str_spec); + } else { + p = string(p, pend, " flags ", str_spec); + p = number(p, pend, res->flags, flag_spec); + } *p++ = ']'; - *p = 0; + *p = '\0'; return string(buf, end, sym, spec); } @@ -801,8 +853,8 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr, * - 'f' For simple symbolic function names without offset * - 'S' For symbolic direct pointers with offset * - 's' For symbolic direct pointers without offset - * - 'R' For a struct resource pointer, it prints the range of - * addresses (not the name nor the flags) + * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] + * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation * - 'm' For a 6-byte MAC address, it prints the hex address without colons @@ -833,7 +885,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'S': return symbol_string(buf, end, ptr, spec, *fmt); case 'R': - return resource_string(buf, end, ptr, spec); + case 'r': + return resource_string(buf, end, ptr, spec, fmt); case 'M': /* Colon separated: 00:01:02:03:04:05 */ case 'm': /* Contiguous: 000102030405 */ return mac_address_string(buf, end, ptr, spec, fmt); |