From 28a48de72b876af794853593cc1412119ada9efc Mon Sep 17 00:00:00 2001 From: "David A. Marlin" Date: Mon, 17 Jan 2005 18:29:21 +0000 Subject: [MTD] NAND extended commands, badb block table autorefresh Added extended commands for AG-AND device and added option for BBT_AUTO_REFRESH. Signed-off-by: David A. Marlin Signed-off-by: Thomas Gleixner --- include/linux/mtd/nand.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9a19c65abd7..0118128ae38 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -5,7 +5,7 @@ * Steven J. Hill * Thomas Gleixner * - * $Id: nand.h,v 1.68 2004/11/12 10:40:37 gleixner Exp $ + * $Id: nand.h,v 1.69 2005/01/17 18:29:18 dmarlin Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -48,6 +48,8 @@ * 02-08-2004 tglx added option field to nand structure for chip anomalities * 05-25-2004 tglx added bad block table support, ST-MICRO manufacturer id * update of nand_chip structure description + * 01-17-2005 dmarlin added extended commands for AG-AND device and added option + * for BBT_AUTO_REFRESH. */ #ifndef __LINUX_MTD_NAND_H #define __LINUX_MTD_NAND_H @@ -115,6 +117,25 @@ extern int nand_read_raw (struct mtd_info *mtd, uint8_t *buf, loff_t from, size_ #define NAND_CMD_READSTART 0x30 #define NAND_CMD_CACHEDPROG 0x15 +/* Extended commands for AG-AND device */ +/* + * Note: the command for NAND_CMD_DEPLETE1 is really 0x00 but + * there is no way to distinguish that from NAND_CMD_READ0 + * until the remaining sequence of commands has been completed + * so add a high order bit and mask it off in the command. + */ +#define NAND_CMD_DEPLETE1 0x100 +#define NAND_CMD_DEPLETE2 0x38 +#define NAND_CMD_STATUS_MULTI 0x71 +#define NAND_CMD_STATUS_ERROR 0x72 +/* multi-bank error status (banks 0-3) */ +#define NAND_CMD_STATUS_ERROR0 0x73 +#define NAND_CMD_STATUS_ERROR1 0x74 +#define NAND_CMD_STATUS_ERROR2 0x75 +#define NAND_CMD_STATUS_ERROR3 0x76 +#define NAND_CMD_STATUS_RESET 0x7f +#define NAND_CMD_STATUS_CLEAR 0xff + /* Status bits */ #define NAND_STATUS_FAIL 0x01 #define NAND_STATUS_FAIL_N1 0x02 @@ -170,6 +191,10 @@ extern int nand_read_raw (struct mtd_info *mtd, uint8_t *buf, loff_t from, size_ /* Chip has a array of 4 pages which can be read without * additional ready /busy waits */ #define NAND_4PAGE_ARRAY 0x00000040 +/* Chip requires that BBT is periodically rewritten to prevent + * bits from adjacent blocks from 'leaking' in altering data. + * This happens with the Renesas AG-AND chips, possibly others. */ +#define BBT_AUTO_REFRESH 0x00000080 /* Options valid for Samsung large page devices */ #define NAND_SAMSUNG_LP_OPTIONS \ -- cgit v1.2.3-70-g09d2 From 99f2a8aea18c9779c141050c6f95a8f1da63bbe4 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 24 Jan 2005 00:37:04 +0000 Subject: [MTD] Platform RAM Driver Driver for generic RAM blocks which are exported by an platform_device from the device driver system. Signed-off-by: Ben Dooks Signed-off-by: Thomas Gleixner --- drivers/mtd/maps/Kconfig | 12 +- drivers/mtd/maps/Makefile | 3 +- drivers/mtd/maps/plat-ram.c | 286 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/plat-ram.h | 35 ++++++ 4 files changed, 334 insertions(+), 2 deletions(-) create mode 100644 drivers/mtd/maps/plat-ram.c create mode 100644 include/linux/mtd/plat-ram.h (limited to 'include/linux') diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 8480057eadb..7d21d432f38 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -1,5 +1,5 @@ # drivers/mtd/maps/Kconfig -# $Id: Kconfig,v 1.42 2005/01/05 16:59:50 dwmw2 Exp $ +# $Id: Kconfig,v 1.43 2005/01/24 00:35:21 bjd Exp $ menu "Mapping drivers for chip access" depends on MTD!=n @@ -659,5 +659,15 @@ config MTD_SHARP_SL help This enables access to the flash chip on the Sharp SL Series of PDAs. +config MTD_PLATRAM + tristate "Map driver for platfrom device RAM (mtd-ram)" + depends on MTD + select MTD_RAM + help + Map driver for RAM areas described via the platform device + system. + + This selection automatically selects the map_ram driver. + endmenu diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile index 7ffe02b8530..d2e6dcc8705 100644 --- a/drivers/mtd/maps/Makefile +++ b/drivers/mtd/maps/Makefile @@ -1,7 +1,7 @@ # # linux/drivers/maps/Makefile # -# $Id: Makefile.common,v 1.23 2005/01/05 17:06:36 dwmw2 Exp $ +# $Id: Makefile.common,v 1.24 2005/01/24 00:35:21 bjd Exp $ ifeq ($(CONFIG_MTD_COMPLEX_MAPPINGS),y) obj-$(CONFIG_MTD) += map_funcs.o @@ -71,3 +71,4 @@ obj-$(CONFIG_MTD_IXP2000) += ixp2000.o obj-$(CONFIG_MTD_WRSBC8260) += wr_sbc82xx_flash.o obj-$(CONFIG_MTD_DMV182) += dmv182.o obj-$(CONFIG_MTD_SHARP_SL) += sharpsl-flash.o +obj-$(CONFIG_MTD_PLATRAM) += plat-ram.o diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c new file mode 100644 index 00000000000..808f94346ad --- /dev/null +++ b/drivers/mtd/maps/plat-ram.c @@ -0,0 +1,286 @@ +/* drivers/mtd/maps/plat-ram.c + * + * (c) 2004-2005 Simtec Electronics + * http://www.simtec.co.uk/products/SWLINUX/ + * Ben Dooks + * + * Generic platfrom device based RAM map + * + * $Id: plat-ram.c,v 1.1 2005/01/24 00:37:02 bjd Exp $ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#define DEBUG + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +/* private structure for each mtd platform ram device created */ + +struct platram_info { + struct device *dev; + struct mtd_info *mtd; + struct map_info map; + struct mtd_partition *partitions; + struct resource *area; + struct platdata_mtd_ram *pdata; +}; + +/* to_platram_info() + * + * device private data to struct platram_info conversion +*/ + +static inline struct platram_info *to_platram_info(struct device *dev) +{ + return (struct platram_info *)dev_get_drvdata(dev); +} + +/* platram_setrw + * + * call the platform device's set rw/ro control + * + * to = 0 => read-only + * = 1 => read-write +*/ + +static inline void platram_setrw(struct platram_info *info, int to) +{ + if (info->pdata == NULL) + return; + + if (info->pdata->set_rw != NULL) + (info->pdata->set_rw)(info->dev, to); +} + +/* platram_remove + * + * called to remove the device from the driver's control +*/ + +static int platram_remove(struct device *dev) +{ + struct platram_info *info = to_platram_info(dev); + + dev_set_drvdata(dev, NULL); + + dev_dbg(dev, "removing device\n"); + + if (info == NULL) + return 0; + + if (info->mtd) { +#ifdef CONFIG_MTD_PARTITIONS + if (info->partitions) { + del_mtd_partitions(info->mtd); + kfree(info->partitions); + } +#endif + del_mtd_device(info->mtd); + map_destroy(info->mtd); + } + + /* ensure ram is left read-only */ + + platram_setrw(info, PLATRAM_RO); + + /* release resources */ + + if (info->area) { + release_resource(info->area); + kfree(info->area); + } + + if (info->map.virt != NULL) + iounmap(info->map.virt); + + kfree(info); + + return 0; +} + +/* platram_probe + * + * called from device drive system when a device matching our + * driver is found. +*/ + +static int platram_probe(struct device *dev) +{ + struct platform_device *pd = to_platform_device(dev); + struct platdata_mtd_ram *pdata; + struct platram_info *info; + struct resource *res; + int err = 0; + + dev_dbg(dev, "probe entered\n"); + + if (dev->platform_data == NULL) { + dev_err(dev, "no platform data supplied\n"); + err = -ENOENT; + goto exit_error; + } + + pdata = dev->platform_data; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) { + dev_err(dev, "no memory for flash info\n"); + err = -ENOMEM; + goto exit_error; + } + + memzero(info, sizeof(*info)); + dev_set_drvdata(dev, info); + + info->dev = dev; + info->pdata = pdata; + + /* get the resource for the memory mapping */ + + res = platform_get_resource(pd, IORESOURCE_MEM, 0); + + if (res == NULL) { + dev_err(dev, "no memory resource specified\n"); + err = -ENOENT; + goto exit_free; + } + + dev_dbg(dev, "got platform resource %p (0x%lx)\n", res, res->start); + + /* setup map parameters */ + + info->map.phys = res->start; + info->map.size = (res->end - res->start) + 1; + info->map.name = pdata->mapname != NULL ? pdata->mapname : pd->name; + info->map.bankwidth = pdata->bankwidth; + + /* register our usage of the memory area */ + + info->area = request_mem_region(res->start, info->map.size, pd->name); + if (info->area == NULL) { + dev_err(dev, "failed to request memory region\n"); + err = -EIO; + goto exit_free; + } + + /* remap the memory area */ + + info->map.virt = ioremap(res->start, info->map.size); + dev_dbg(dev, "virt %p, %d bytes\n", info->map.virt, info->map.size); + + if (info->map.virt == NULL) { + dev_err(dev, "failed to ioremap() region\n"); + err = -EIO; + goto exit_free; + } + + { + unsigned int *p = (unsigned int *)info->map.virt; + printk("%08x %08x %08x %08x\n", + readl(p), readl(p+1), readl(p+2), readl(p+3)); + } + + simple_map_init(&info->map); + + dev_dbg(dev, "initialised map, probing for mtd\n"); + + /* probe for the right mtd map driver */ + + info->mtd = do_map_probe("map_ram" , &info->map); + if (info->mtd == NULL) { + dev_err(dev, "failed to probe for map_ram\n"); + err = -ENOMEM; + goto exit_free; + } + + info->mtd->owner = THIS_MODULE; + + platram_setrw(info, PLATRAM_RW); + + /* check to see if there are any available partitions, or wether + * to add this device whole */ + +#ifdef CONFIG_MTD_PARTITIONS + if (pdata->nr_partitions > 0) { + const char **probes = { NULL }; + + if (pdata->probes) + probes = (const char **)pdata->probes; + + err = parse_mtd_partitions(info->mtd, probes, + &info->partitions, 0); + if (err > 0) { + err = add_mtd_partitions(info->mtd, info->partitions, + err); + } + } +#endif /* CONFIG_MTD_PARTITIONS */ + + if (add_mtd_device(info->mtd)) { + dev_err(dev, "add_mtd_device() failed\n"); + err = -ENOMEM; + } + + dev_info(dev, "registered mtd device\n"); + return err; + + exit_free: + platram_remove(dev); + exit_error: + return err; +} + +/* device driver info */ + +static struct device_driver platram_driver = { + .name = "mtd-ram", + .bus = &platform_bus_type, + .probe = platram_probe, + .remove = platram_remove, +}; + +/* module init/exit */ + +static int __init platram_init(void) +{ + printk("Generic platform RAM MTD, (c) 2004 Simtec Electronics\n"); + return driver_register(&platram_driver); +} + +static void __exit platram_exit(void) +{ + driver_unregister(&platram_driver); +} + +module_init(platram_init); +module_exit(platram_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ben Dooks "); +MODULE_DESCRIPTION("MTD platform RAM map driver"); diff --git a/include/linux/mtd/plat-ram.h b/include/linux/mtd/plat-ram.h new file mode 100644 index 00000000000..2332eda07e0 --- /dev/null +++ b/include/linux/mtd/plat-ram.h @@ -0,0 +1,35 @@ +/* linux/include/mtd/plat-ram.h + * + * (c) 2004 Simtec Electronics + * http://www.simtec.co.uk/products/SWLINUX/ + * Ben Dooks + * + * Generic platform device based RAM map + * + * $Id: plat-ram.h,v 1.2 2005/01/24 00:37:40 bjd Exp $ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __LINUX_MTD_PLATRAM_H +#define __LINUX_MTD_PLATRAM_H __FILE__ + +#define PLATRAM_RO (0) +#define PLATRAM_RW (1) + +struct platdata_mtd_ram { + char *mapname; + char **probes; + struct mtd_partition *partitions; + int nr_partitions; + int bankwidth; + + /* control callbacks */ + + void (*set_rw)(struct device *dev, int to); +}; + +#endif /* __LINUX_MTD_PLATRAM_H */ -- cgit v1.2.3-70-g09d2 From 068e3c0a002c79a5e3cc7c42cb749c4bb126288c Mon Sep 17 00:00:00 2001 From: "David A. Marlin" Date: Mon, 24 Jan 2005 03:07:46 +0000 Subject: [MTD] NAND Add optional ECC status check callback Add optional hardware specific callback routine to perform extra error status checks on erase and write failures for devices with hardware ECC. Signed-off-by: David A. Marlin Signed-off-by: Thomas Gleixner --- drivers/mtd/nand/nand_base.c | 65 ++++++++++++++++++++++++++++++++++++-------- include/linux/mtd/nand.h | 16 +++++++++-- 2 files changed, 68 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 9f7c42ceecf..7094dd5716d 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -42,6 +42,10 @@ * a "device recovery" operation must be performed when power is restored * to ensure correct operation. * + * 01-20-2005 dmarlin: added support for optional hardware specific callback routine to + * perform extra error status checks on erase and write failures. This required + * adding a wrapper function for nand_read_ecc. + * * Credits: * David Woodhouse for adding multichip support * @@ -55,7 +59,7 @@ * The AG-AND chips have nice features for speed improvement, * which are not supported yet. Read / program 4 pages in one go. * - * $Id: nand_base.c,v 1.129 2005/01/23 18:30:50 dmarlin Exp $ + * $Id: nand_base.c,v 1.130 2005/01/24 03:07:43 dmarlin Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -896,6 +900,12 @@ static int nand_write_page (struct mtd_info *mtd, struct nand_chip *this, int pa if (!cached) { /* call wait ready function */ status = this->waitfunc (mtd, this, FL_WRITING); + + /* See if operation failed and additional status checks are available */ + if ((status & NAND_STATUS_FAIL) && (this->errstat)) { + status = this->errstat(mtd, this, FL_WRITING, status, page); + } + /* See if device thinks it succeeded */ if (status & NAND_STATUS_FAIL) { DEBUG (MTD_DEBUG_LEVEL0, "%s: " "Failed write, page 0x%08x, ", __FUNCTION__, page); @@ -1022,23 +1032,24 @@ out: #endif /** - * nand_read - [MTD Interface] MTD compability function for nand_read_ecc + * nand_read - [MTD Interface] MTD compability function for nand_do_read_ecc * @mtd: MTD device structure * @from: offset to read from * @len: number of bytes to read * @retlen: pointer to variable to store the number of read bytes * @buf: the databuffer to put data * - * This function simply calls nand_read_ecc with oob buffer and oobsel = NULL -*/ + * This function simply calls nand_do_read_ecc with oob buffer and oobsel = NULL + * and flags = 0xff + */ static int nand_read (struct mtd_info *mtd, loff_t from, size_t len, size_t * retlen, u_char * buf) { - return nand_read_ecc (mtd, from, len, retlen, buf, NULL, NULL); + return nand_do_read_ecc (mtd, from, len, retlen, buf, NULL, NULL, 0xff); } /** - * nand_read_ecc - [MTD Interface] Read data with ECC + * nand_read_ecc - [MTD Interface] MTD compability function for nand_do_read_ecc * @mtd: MTD device structure * @from: offset to read from * @len: number of bytes to read @@ -1047,10 +1058,34 @@ static int nand_read (struct mtd_info *mtd, loff_t from, size_t len, size_t * re * @oob_buf: filesystem supplied oob data buffer * @oobsel: oob selection structure * - * NAND read with ECC + * This function simply calls nand_do_read_ecc with flags = 0xff */ static int nand_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, size_t * retlen, u_char * buf, u_char * oob_buf, struct nand_oobinfo *oobsel) +{ + return nand_do_read_ecc(mtd, from, len, retlen, buf, oob_buf, oobsel, 0xff); +} + + +/** + * nand_do_read_ecc - [MTD Interface] Read data with ECC + * @mtd: MTD device structure + * @from: offset to read from + * @len: number of bytes to read + * @retlen: pointer to variable to store the number of read bytes + * @buf: the databuffer to put data + * @oob_buf: filesystem supplied oob data buffer + * @oobsel: oob selection structure + * @flags: flag to indicate if nand_get_device/nand_release_device should be preformed + * and how many corrected error bits are acceptable: + * bits 0..7 - number of tolerable errors + * bit 8 - 0 == do not get/release chip, 1 == get/release chip + * + * NAND read with ECC + */ +int nand_do_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, + size_t * retlen, u_char * buf, u_char * oob_buf, + struct nand_oobinfo *oobsel, int flags) { int i, j, col, realpage, page, end, ecc, chipnr, sndcmd = 1; int read = 0, oob = 0, ecc_status = 0, ecc_failed = 0; @@ -1076,7 +1111,8 @@ static int nand_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, } /* Grab the lock and see if the device is available */ - nand_get_device (this, mtd, FL_READING); + if (flags & NAND_GET_DEVICE) + nand_get_device (this, mtd, FL_READING); /* use userspace supplied oobinfo, if zero */ if (oobsel == NULL) @@ -1180,7 +1216,8 @@ static int nand_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, /* We calc error correction directly, it checks the hw * generator for an error, reads back the syndrome and * does the error correction on the fly */ - if (this->correct_data(mtd, &data_poi[datidx], &oob_data[i], &ecc_code[i]) == -1) { + ecc_status = this->correct_data(mtd, &data_poi[datidx], &oob_data[i], &ecc_code[i]); + if ((ecc_status == -1) || (ecc_status > (flags && 0xff))) { DEBUG (MTD_DEBUG_LEVEL0, "nand_read_ecc: " "Failed ECC read, page 0x%08x on chip %d\n", page, chipnr); ecc_failed++; @@ -1219,7 +1256,7 @@ static int nand_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, p[i] = ecc_status; } - if (ecc_status == -1) { + if ((ecc_status == -1) || (ecc_status > (flags && 0xff))) { DEBUG (MTD_DEBUG_LEVEL0, "nand_read_ecc: " "Failed ECC read, page 0x%08x\n", page); ecc_failed++; } @@ -1289,7 +1326,8 @@ static int nand_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, } /* Deselect and wake up anyone waiting on the device */ - nand_release_device(mtd); + if (flags & NAND_GET_DEVICE) + nand_release_device(mtd); /* * Return success, if no ECC failures, else -EBADMSG @@ -2103,6 +2141,11 @@ int nand_erase_nand (struct mtd_info *mtd, struct erase_info *instr, int allowbb status = this->waitfunc (mtd, this, FL_ERASING); + /* See if operation failed and additional status checks are available */ + if ((status & NAND_STATUS_FAIL) && (this->errstat)) { + status = this->errstat(mtd, this, FL_ERASING, status, page); + } + /* See if block erase succeeded */ if (status & NAND_STATUS_FAIL) { DEBUG (MTD_DEBUG_LEVEL0, "nand_erase: " "Failed erase, page 0x%08x\n", page); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 0118128ae38..cf52f20c6de 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -5,7 +5,7 @@ * Steven J. Hill * Thomas Gleixner * - * $Id: nand.h,v 1.69 2005/01/17 18:29:18 dmarlin Exp $ + * $Id: nand.h,v 1.70 2005/01/24 03:07:42 dmarlin Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -50,6 +50,8 @@ * update of nand_chip structure description * 01-17-2005 dmarlin added extended commands for AG-AND device and added option * for BBT_AUTO_REFRESH. + * 01-20-2005 dmarlin added optional pointer to hardware specific callback for + * extra error status checks. */ #ifndef __LINUX_MTD_NAND_H #define __LINUX_MTD_NAND_H @@ -164,7 +166,7 @@ extern int nand_read_raw (struct mtd_info *mtd, uint8_t *buf, loff_t from, size_ /* * Constants for Hardware ECC -*/ + */ /* Reset Hardware ECC for read */ #define NAND_ECC_READ 0 /* Reset Hardware ECC for write */ @@ -172,6 +174,10 @@ extern int nand_read_raw (struct mtd_info *mtd, uint8_t *buf, loff_t from, size_ /* Enable Hardware ECC before syndrom is read back from flash */ #define NAND_ECC_READSYN 2 +/* Bit mask for flags passed to do_nand_read_ecc */ +#define NAND_GET_DEVICE 0x80 + + /* Option constants for bizarre disfunctionality and real * features */ @@ -308,6 +314,8 @@ struct nand_hw_control { * @badblock_pattern: [REPLACEABLE] bad block scan pattern used for initial bad block scan * @controller: [OPTIONAL] a pointer to a hardware controller structure which is shared among multiple independend devices * @priv: [OPTIONAL] pointer to private chip date + * @errstat: [OPTIONAL] hardware specific function to perform additional error status checks + * (determine if errors are correctable) */ struct nand_chip { @@ -363,6 +371,7 @@ struct nand_chip { struct nand_bbt_descr *badblock_pattern; struct nand_hw_control *controller; void *priv; + int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, int status, int page); }; /* @@ -484,6 +493,9 @@ extern int nand_update_bbt (struct mtd_info *mtd, loff_t offs); extern int nand_default_bbt (struct mtd_info *mtd); extern int nand_isbad_bbt (struct mtd_info *mtd, loff_t offs, int allowbbt); extern int nand_erase_nand (struct mtd_info *mtd, struct erase_info *instr, int allowbbt); +extern int nand_do_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, + size_t * retlen, u_char * buf, u_char * oob_buf, + struct nand_oobinfo *oobsel, int flags); /* * Constants for oob configuration -- cgit v1.2.3-70-g09d2 From 72b56a2d7dccd9ea90f34f6ddb653086a3f3bd2e Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 5 Feb 2005 02:06:19 +0000 Subject: [MTD] Add OTP basisc add structure definition for OTP region info Signed-off-by: Nicolas Pitre Signed-off-by: Thomas Gleixner --- drivers/mtd/chips/cfi_cmdset_0001.c | 8 +++++--- include/linux/mtd/cfi.h | 10 +++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index c268bcd7172..c630d7532f7 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -4,7 +4,7 @@ * * (C) 2000 Red Hat. GPL'd * - * $Id: cfi_cmdset_0001.c,v 1.164 2004/11/16 18:29:00 dwmw2 Exp $ + * $Id: cfi_cmdset_0001.c,v 1.165 2005/02/05 02:06:15 nico Exp $ * * * 10/10/2000 Nicolas Pitre @@ -252,7 +252,8 @@ read_pri_intelext(struct map_info *map, __u16 adr) int nb_parts, i; /* Protection Register info */ - extra_size += (extp->NumProtectionFields - 1) * (4 + 6); + extra_size += (extp->NumProtectionFields - 1) * + sizeof(struct cfi_intelext_otpinfo); /* Burst Read info */ extra_size += 6; @@ -471,7 +472,8 @@ static int cfi_intelext_partition_fixup(struct mtd_info *mtd, int offs, numregions, numparts, partshift, numvirtchips, i, j; /* Protection Register info */ - offs = (extp->NumProtectionFields - 1) * (4 + 6); + offs = (extp->NumProtectionFields - 1) * + sizeof(struct cfi_intelext_otpinfo); /* Burst Read info */ offs += 6; diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 2ed8c585021..d87dc3fbd4b 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -1,7 +1,7 @@ /* Common Flash Interface structures * See http://support.intel.com/design/flash/technote/index.htm - * $Id: cfi.h,v 1.50 2004/11/20 12:46:51 dwmw2 Exp $ + * $Id: cfi.h,v 1.51 2005/02/05 02:06:16 nico Exp $ */ #ifndef __MTD_CFI_H__ @@ -148,6 +148,14 @@ struct cfi_pri_intelext { uint8_t extra[0]; } __attribute__((packed)); +struct cfi_intelext_otpinfo { + uint32_t ProtRegAddr; + uint16_t FactGroups; + uint8_t FactProtRegSize; + uint16_t UserGroups; + uint8_t UserProtRegSize; +} __attribute__((packed)); + struct cfi_intelext_blockinfo { uint16_t NumIdentBlocks; uint16_t BlockSize; -- cgit v1.2.3-70-g09d2 From f77814dd5728edaf1239d19755d2aa0d8c33d861 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 8 Feb 2005 17:11:19 +0000 Subject: [MTD] Support for protection register support on Intel FLASH chips This enables support for reading, writing and locking so called "Protection Registers" present on some flash chips. A subset of them are pre-programmed at the factory with a unique set of values. The rest is user-programmable. Signed-off-by: Nicolas Pitre Signed-off-by: Thomas Gleixner --- drivers/mtd/chips/Kconfig | 27 ++- drivers/mtd/chips/cfi_cmdset_0001.c | 401 +++++++++++++++++++++++++----------- drivers/mtd/mtdpart.c | 28 ++- include/linux/mtd/cfi.h | 4 +- include/linux/mtd/flashchip.h | 3 +- include/linux/mtd/map.h | 15 +- include/linux/mtd/mtd.h | 10 +- include/mtd/mtd-abi.h | 8 +- 8 files changed, 369 insertions(+), 127 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig index d682dbc8157..f4eda1e40d5 100644 --- a/drivers/mtd/chips/Kconfig +++ b/drivers/mtd/chips/Kconfig @@ -1,5 +1,5 @@ # drivers/mtd/chips/Kconfig -# $Id: Kconfig,v 1.13 2004/12/01 15:49:10 nico Exp $ +# $Id: Kconfig,v 1.14 2005/02/08 17:11:15 nico Exp $ menu "RAM/ROM/Flash chip drivers" depends on MTD!=n @@ -155,6 +155,31 @@ config MTD_CFI_I8 If your flash chips are interleaved in eights - i.e. you have eight flash chips addressed by each bus cycle, then say 'Y'. +config MTD_OTP + bool "Protection Registers aka one-time programmable (OTP) bits" + depends on MTD_CFI_ADV_OPTIONS + default n + help + This enables support for reading, writing and locking so called + "Protection Registers" present on some flash chips. + A subset of them are pre-programmed at the factory with a + unique set of values. The rest is user-programmable. + + The user-programmable Protection Registers contain one-time + programmable (OTP) bits; when programmed, register bits cannot be + erased. Each Protection Register can be accessed multiple times to + program individual bits, as long as the register remains unlocked. + + Each Protection Register has an associated Lock Register bit. When a + Lock Register bit is programmed, the associated Protection Register + can only be read; it can no longer be programmed. Additionally, + because the Lock Register bits themselves are OTP, when programmed, + Lock Register bits cannot be erased. Therefore, when a Protection + Register is locked, it cannot be unlocked. + + This feature should therefore be used with extreme care. Any mistake + in the programming of OTP bits will waste them. + config MTD_CFI_INTELEXT tristate "Support for Intel/Sharp flash chips" depends on MTD_GEN_PROBE diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index c630d7532f7..b3f5acf0760 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -4,7 +4,7 @@ * * (C) 2000 Red Hat. GPL'd * - * $Id: cfi_cmdset_0001.c,v 1.165 2005/02/05 02:06:15 nico Exp $ + * $Id: cfi_cmdset_0001.c,v 1.167 2005/02/08 17:11:15 nico Exp $ * * * 10/10/2000 Nicolas Pitre @@ -48,14 +48,20 @@ #define M50LPW080 0x002F static int cfi_intelext_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *); -//static int cfi_intelext_read_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); -//static int cfi_intelext_read_fact_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); static int cfi_intelext_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); static int cfi_intelext_write_buffers(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); static int cfi_intelext_erase_varsize(struct mtd_info *, struct erase_info *); static void cfi_intelext_sync (struct mtd_info *); static int cfi_intelext_lock(struct mtd_info *mtd, loff_t ofs, size_t len); static int cfi_intelext_unlock(struct mtd_info *mtd, loff_t ofs, size_t len); +static int cfi_intelext_read_fact_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); +static int cfi_intelext_read_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); +static int cfi_intelext_write_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); +static int cfi_intelext_lock_user_prot_reg (struct mtd_info *, loff_t, size_t); +static int cfi_intelext_get_fact_prot_info (struct mtd_info *, + struct otp_info *, size_t); +static int cfi_intelext_get_user_prot_info (struct mtd_info *, + struct otp_info *, size_t); static int cfi_intelext_suspend (struct mtd_info *); static void cfi_intelext_resume (struct mtd_info *); @@ -423,9 +429,13 @@ static struct mtd_info *cfi_intelext_setup(struct mtd_info *mtd) mtd->eraseregions[i].numblocks); } -#if 0 - mtd->read_user_prot_reg = cfi_intelext_read_user_prot_reg; +#ifdef CONFIG_MTD_OTP mtd->read_fact_prot_reg = cfi_intelext_read_fact_prot_reg; + mtd->read_user_prot_reg = cfi_intelext_read_user_prot_reg; + mtd->write_user_prot_reg = cfi_intelext_write_user_prot_reg; + mtd->lock_user_prot_reg = cfi_intelext_lock_user_prot_reg; + mtd->get_fact_prot_info = cfi_intelext_get_fact_prot_info; + mtd->get_user_prot_info = cfi_intelext_get_user_prot_info; #endif /* This function has the potential to distort the reality @@ -565,7 +575,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr resettime: timeo = jiffies + HZ; retry: - if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING)) { + if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE)) { /* * OK. We have possibility for contension on the write/erase * operations which are global to the real chip and not per @@ -1178,111 +1188,11 @@ static int cfi_intelext_read (struct mtd_info *mtd, loff_t from, size_t len, siz return ret; } -#if 0 -static int __xipram cfi_intelext_read_prot_reg (struct mtd_info *mtd, - loff_t from, size_t len, - size_t *retlen, - u_char *buf, - int base_offst, int reg_sz) -{ - struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; - struct cfi_pri_intelext *extp = cfi->cmdset_priv; - struct flchip *chip; - int ofs_factor = cfi->interleave * cfi->device_type; - int count = len; - int chip_num, offst; - int ret; - - chip_num = ((unsigned int)from/reg_sz); - offst = from - (reg_sz*chip_num)+base_offst; - - while (count) { - /* Calculate which chip & protection register offset we need */ - - if (chip_num >= cfi->numchips) - goto out; - - chip = &cfi->chips[chip_num]; - - spin_lock(chip->mutex); - ret = get_chip(map, chip, chip->start, FL_JEDEC_QUERY); - if (ret) { - spin_unlock(chip->mutex); - return (len-count)?:ret; - } - - xip_disable(map, chip, chip->start); - - if (chip->state != FL_JEDEC_QUERY) { - map_write(map, CMD(0x90), chip->start); - chip->state = FL_JEDEC_QUERY; - } - - while (count && ((offst-base_offst) < reg_sz)) { - *buf = map_read8(map,(chip->start+((extp->ProtRegAddr+1)*ofs_factor)+offst)); - buf++; - offst++; - count--; - } - - xip_enable(map, chip, chip->start); - put_chip(map, chip, chip->start); - spin_unlock(chip->mutex); - - /* Move on to the next chip */ - chip_num++; - offst = base_offst; - } - - out: - return len-count; -} - -static int cfi_intelext_read_user_prot_reg (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) -{ - struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; - struct cfi_pri_intelext *extp=cfi->cmdset_priv; - int base_offst,reg_sz; - - /* Check that we actually have some protection registers */ - if(!extp || !(extp->FeatureSupport&64)){ - printk(KERN_WARNING "%s: This flash device has no protection data to read!\n",map->name); - return 0; - } - - base_offst=(1<FactProtRegSize); - reg_sz=(1<UserProtRegSize); - - return cfi_intelext_read_prot_reg(mtd, from, len, retlen, buf, base_offst, reg_sz); -} - -static int cfi_intelext_read_fact_prot_reg (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) -{ - struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; - struct cfi_pri_intelext *extp=cfi->cmdset_priv; - int base_offst,reg_sz; - - /* Check that we actually have some protection registers */ - if(!extp || !(extp->FeatureSupport&64)){ - printk(KERN_WARNING "%s: This flash device has no protection data to read!\n",map->name); - return 0; - } - - base_offst=0; - reg_sz=(1<FactProtRegSize); - - return cfi_intelext_read_prot_reg(mtd, from, len, retlen, buf, base_offst, reg_sz); -} -#endif - static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip, - unsigned long adr, map_word datum) + unsigned long adr, map_word datum, int mode) { struct cfi_private *cfi = map->fldrv_priv; - map_word status, status_OK; + map_word status, status_OK, write_cmd; unsigned long timeo; int z, ret=0; @@ -1290,9 +1200,14 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip, /* Let's determine this according to the interleave only once */ status_OK = CMD(0x80); + switch (mode) { + case FL_WRITING: write_cmd = CMD(0x40); break; + case FL_OTP_WRITE: write_cmd = CMD(0xc0); break; + default: return -EINVAL; + } spin_lock(chip->mutex); - ret = get_chip(map, chip, adr, FL_WRITING); + ret = get_chip(map, chip, adr, mode); if (ret) { spin_unlock(chip->mutex); return ret; @@ -1301,9 +1216,9 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip, XIP_INVAL_CACHED_RANGE(map, adr, map_bankwidth(map)); ENABLE_VPP(map); xip_disable(map, chip, adr); - map_write(map, CMD(0x40), adr); + map_write(map, write_cmd, adr); map_write(map, datum, adr); - chip->state = FL_WRITING; + chip->state = mode; spin_unlock(chip->mutex); INVALIDATE_CACHED_RANGE(map, adr, map_bankwidth(map)); @@ -1313,7 +1228,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip, timeo = jiffies + (HZ/2); z = 0; for (;;) { - if (chip->state != FL_WRITING) { + if (chip->state != mode) { /* Someone's suspended the write. Sleep */ DECLARE_WAITQUEUE(wait, current); @@ -1401,7 +1316,7 @@ static int cfi_intelext_write_words (struct mtd_info *mtd, loff_t to , size_t le datum = map_word_load_partial(map, datum, buf, gap, n); ret = do_write_oneword(map, &cfi->chips[chipnum], - bus_ofs, datum); + bus_ofs, datum, FL_WRITING); if (ret) return ret; @@ -1422,7 +1337,7 @@ static int cfi_intelext_write_words (struct mtd_info *mtd, loff_t to , size_t le map_word datum = map_word_load(map, buf); ret = do_write_oneword(map, &cfi->chips[chipnum], - ofs, datum); + ofs, datum, FL_WRITING); if (ret) return ret; @@ -1446,7 +1361,7 @@ static int cfi_intelext_write_words (struct mtd_info *mtd, loff_t to , size_t le datum = map_word_load_partial(map, datum, buf, 0, len); ret = do_write_oneword(map, &cfi->chips[chipnum], - ofs, datum); + ofs, datum, FL_WRITING); if (ret) return ret; @@ -2036,6 +1951,262 @@ static int cfi_intelext_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) return ret; } +#ifdef CONFIG_MTD_OTP + +typedef int (*otp_op_t)(struct map_info *map, struct flchip *chip, + u_long data_offset, u_char *buf, u_int size, + u_long prot_offset, u_int groupno, u_int groupsize); + +static int __xipram +do_otp_read(struct map_info *map, struct flchip *chip, u_long offset, + u_char *buf, u_int size, u_long prot, u_int grpno, u_int grpsz) +{ + struct cfi_private *cfi = map->fldrv_priv; + int ret; + + spin_lock(chip->mutex); + ret = get_chip(map, chip, chip->start, FL_JEDEC_QUERY); + if (ret) { + spin_unlock(chip->mutex); + return ret; + } + + /* let's ensure we're not reading back cached data from array mode */ + if (map->inval_cache) + map->inval_cache(map, chip->start + offset, size); + + xip_disable(map, chip, chip->start); + if (chip->state != FL_JEDEC_QUERY) { + map_write(map, CMD(0x90), chip->start); + chip->state = FL_JEDEC_QUERY; + } + map_copy_from(map, buf, chip->start + offset, size); + xip_enable(map, chip, chip->start); + + /* then ensure we don't keep OTP data in the cache */ + if (map->inval_cache) + map->inval_cache(map, chip->start + offset, size); + + put_chip(map, chip, chip->start); + spin_unlock(chip->mutex); + return 0; +} + +static int +do_otp_write(struct map_info *map, struct flchip *chip, u_long offset, + u_char *buf, u_int size, u_long prot, u_int grpno, u_int grpsz) +{ + int ret; + + while (size) { + unsigned long bus_ofs = offset & ~(map_bankwidth(map)-1); + int gap = offset - bus_ofs; + int n = min_t(int, size, map_bankwidth(map)-gap); + map_word datum = map_word_ff(map); + + datum = map_word_load_partial(map, datum, buf, gap, n); + ret = do_write_oneword(map, chip, bus_ofs, datum, FL_OTP_WRITE); + if (ret) + return ret; + + offset += n; + buf += n; + size -= n; + } + + return 0; +} + +static int +do_otp_lock(struct map_info *map, struct flchip *chip, u_long offset, + u_char *buf, u_int size, u_long prot, u_int grpno, u_int grpsz) +{ + struct cfi_private *cfi = map->fldrv_priv; + map_word datum; + + /* make sure area matches group boundaries */ + if (offset != 0 || size != grpsz) + return -EXDEV; + + datum = map_word_ff(map); + datum = map_word_clr(map, datum, CMD(1 << grpno)); + return do_write_oneword(map, chip, prot, datum, FL_OTP_WRITE); +} + +static int cfi_intelext_otp_walk(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf, + otp_op_t action, int user_regs) +{ + struct map_info *map = mtd->priv; + struct cfi_private *cfi = map->fldrv_priv; + struct cfi_pri_intelext *extp = cfi->cmdset_priv; + struct flchip *chip; + struct cfi_intelext_otpinfo *otp; + u_long devsize, reg_prot_offset, data_offset; + u_int chip_num, chip_step, field, reg_fact_size, reg_user_size; + u_int groups, groupno, groupsize, reg_fact_groups, reg_user_groups; + int ret; + + *retlen = 0; + + /* Check that we actually have some OTP registers */ + if (!extp || !(extp->FeatureSupport & 64) || !extp->NumProtectionFields) + return -ENODATA; + + /* we need real chips here not virtual ones */ + devsize = (1 << cfi->cfiq->DevSize) * cfi->interleave; + chip_step = devsize >> cfi->chipshift; + + for (chip_num = 0; chip_num < cfi->numchips; chip_num += chip_step) { + chip = &cfi->chips[chip_num]; + otp = (struct cfi_intelext_otpinfo *)&extp->extra[0]; + + /* first OTP region */ + field = 0; + reg_prot_offset = extp->ProtRegAddr; + reg_fact_groups = 1; + reg_fact_size = 1 << extp->FactProtRegSize; + reg_user_groups = 1; + reg_user_size = 1 << extp->UserProtRegSize; + + while (len > 0) { + /* flash geometry fixup */ + data_offset = reg_prot_offset + 1; + data_offset *= cfi->interleave * cfi->device_type; + reg_prot_offset *= cfi->interleave * cfi->device_type; + reg_fact_size *= cfi->interleave; + reg_user_size *= cfi->interleave; + + if (user_regs) { + groups = reg_user_groups; + groupsize = reg_user_size; + /* skip over factory reg area */ + groupno = reg_fact_groups; + data_offset += reg_fact_groups * reg_fact_size; + } else { + groups = reg_fact_groups; + groupsize = reg_fact_size; + groupno = 0; + } + + while (groups > 0) { + if (!action) { + /* + * Special case: if action is NULL + * we fill buf with otp_info records. + */ + struct otp_info *otpinfo; + map_word lockword; + len -= sizeof(struct otp_info); + if (len <= 0) + return -ENOSPC; + ret = do_otp_read(map, chip, + reg_prot_offset, + (u_char *)&lockword, + map_bankwidth(map), + 0, 0, 0); + if (ret) + return ret; + otpinfo = (struct otp_info *)buf; + otpinfo->start = from; + otpinfo->length = groupsize; + otpinfo->locked = + !map_word_bitsset(map, lockword, + CMD(1 << groupno)); + from += groupsize; + buf += sizeof(*otpinfo); + *retlen += sizeof(*otpinfo); + } else if (from >= groupsize) { + from -= groupsize; + } else { + int size = groupsize; + data_offset += from; + size -= from; + from = 0; + if (size > len) + size = len; + ret = action(map, chip, data_offset, + buf, size, reg_prot_offset, + groupno, groupsize); + if (ret < 0) + return ret; + buf += size; + len -= size; + *retlen += size; + } + groupno++; + groups--; + } + + /* next OTP region */ + if (++field == extp->NumProtectionFields) + break; + reg_prot_offset = otp->ProtRegAddr; + reg_fact_groups = otp->FactGroups; + reg_fact_size = 1 << otp->FactProtRegSize; + reg_user_groups = otp->UserGroups; + reg_user_size = 1 << otp->UserProtRegSize; + otp++; + } + } + + return 0; +} + +static int cfi_intelext_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, + size_t len, size_t *retlen, + u_char *buf) +{ + return cfi_intelext_otp_walk(mtd, from, len, retlen, + buf, do_otp_read, 0); +} + +static int cfi_intelext_read_user_prot_reg(struct mtd_info *mtd, loff_t from, + size_t len, size_t *retlen, + u_char *buf) +{ + return cfi_intelext_otp_walk(mtd, from, len, retlen, + buf, do_otp_read, 1); +} + +static int cfi_intelext_write_user_prot_reg(struct mtd_info *mtd, loff_t from, + size_t len, size_t *retlen, + u_char *buf) +{ + return cfi_intelext_otp_walk(mtd, from, len, retlen, + buf, do_otp_write, 1); +} + +static int cfi_intelext_lock_user_prot_reg(struct mtd_info *mtd, + loff_t from, size_t len) +{ + size_t retlen; + return cfi_intelext_otp_walk(mtd, from, len, &retlen, + NULL, do_otp_lock, 1); +} + +static int cfi_intelext_get_fact_prot_info(struct mtd_info *mtd, + struct otp_info *buf, size_t len) +{ + size_t retlen; + int ret; + + ret = cfi_intelext_otp_walk(mtd, 0, len, &retlen, (u_char *)buf, NULL, 0); + return ret ? : retlen; +} + +static int cfi_intelext_get_user_prot_info(struct mtd_info *mtd, + struct otp_info *buf, size_t len) +{ + size_t retlen; + int ret; + + ret = cfi_intelext_otp_walk(mtd, 0, len, &retlen, (u_char *)buf, NULL, 1); + return ret ? : retlen; +} + +#endif + static int cfi_intelext_suspend(struct mtd_info *mtd) { struct map_info *map = mtd->priv; diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 96ebb52f24b..b92e6bfffaf 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -5,7 +5,7 @@ * * This code is GPL * - * $Id: mtdpart.c,v 1.51 2004/11/16 18:28:59 dwmw2 Exp $ + * $Id: mtdpart.c,v 1.53 2005/02/08 17:11:13 nico Exp $ * * 02-21-2002 Thomas Gleixner * added support for read_oob, write_oob @@ -116,6 +116,13 @@ static int part_read_user_prot_reg (struct mtd_info *mtd, loff_t from, size_t le len, retlen, buf); } +static int part_get_user_prot_info (struct mtd_info *mtd, + struct otp_info *buf, size_t len) +{ + struct mtd_part *part = PART(mtd); + return part->master->get_user_prot_info (part->master, buf, len); +} + static int part_read_fact_prot_reg (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { @@ -124,6 +131,13 @@ static int part_read_fact_prot_reg (struct mtd_info *mtd, loff_t from, size_t le len, retlen, buf); } +static int part_get_fact_prot_info (struct mtd_info *mtd, + struct otp_info *buf, size_t len) +{ + struct mtd_part *part = PART(mtd); + return part->master->get_fact_prot_info (part->master, buf, len); +} + static int part_write (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { @@ -182,6 +196,12 @@ static int part_write_user_prot_reg (struct mtd_info *mtd, loff_t from, size_t l len, retlen, buf); } +static int part_lock_user_prot_reg (struct mtd_info *mtd, loff_t from, size_t len) +{ + struct mtd_part *part = PART(mtd); + return part->master->lock_user_prot_reg (part->master, from, len); +} + static int part_writev (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen) { @@ -409,6 +429,12 @@ int add_mtd_partitions(struct mtd_info *master, slave->mtd.read_fact_prot_reg = part_read_fact_prot_reg; if(master->write_user_prot_reg) slave->mtd.write_user_prot_reg = part_write_user_prot_reg; + if(master->lock_user_prot_reg) + slave->mtd.lock_user_prot_reg = part_lock_user_prot_reg; + if(master->get_user_prot_info) + slave->mtd.get_user_prot_info = part_get_user_prot_info; + if(master->get_fact_prot_info) + slave->mtd.get_fact_prot_info = part_get_fact_prot_info; if (master->sync) slave->mtd.sync = part_sync; if (!i && master->suspend && master->resume) { diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index d87dc3fbd4b..76255474a27 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -1,7 +1,7 @@ /* Common Flash Interface structures * See http://support.intel.com/design/flash/technote/index.htm - * $Id: cfi.h,v 1.51 2005/02/05 02:06:16 nico Exp $ + * $Id: cfi.h,v 1.52 2005/02/08 17:11:15 nico Exp $ */ #ifndef __MTD_CFI_H__ @@ -252,7 +252,7 @@ static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, int interleave, int * It looks too long to be inline, but in the common case it should almost all * get optimised away. */ -static inline map_word cfi_build_cmd(u_char cmd, struct map_info *map, struct cfi_private *cfi) +static inline map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi) { map_word val = { {0} }; int wordwidth, words_per_bus, chip_mode, chips_per_word; diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h index c66ba812bf9..e778a1ab23c 100644 --- a/include/linux/mtd/flashchip.h +++ b/include/linux/mtd/flashchip.h @@ -6,7 +6,7 @@ * * (C) 2000 Red Hat. GPLd. * - * $Id: flashchip.h,v 1.15 2004/11/05 22:41:06 nico Exp $ + * $Id: flashchip.h,v 1.16 2005/02/08 17:11:15 nico Exp $ * */ @@ -29,6 +29,7 @@ typedef enum { FL_ERASE_SUSPENDED, FL_WRITING, FL_WRITING_TO_BUFFER, + FL_OTP_WRITE, FL_WRITE_SUSPENDING, FL_WRITE_SUSPENDED, FL_PM_SUSPENDED, diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index f0268b99c90..8fc6679aa9b 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -1,6 +1,6 @@ /* Overhauled routines for dealing with different mmap regions of flash */ -/* $Id: map.h,v 1.46 2005/01/05 17:09:44 dwmw2 Exp $ */ +/* $Id: map.h,v 1.47 2005/02/08 17:11:15 nico Exp $ */ #ifndef __LINUX_MTD_MAP_H__ #define __LINUX_MTD_MAP_H__ @@ -263,6 +263,17 @@ static inline map_word map_word_and(struct map_info *map, map_word val1, map_wor return r; } +static inline map_word map_word_clr(struct map_info *map, map_word val1, map_word val2) +{ + map_word r; + int i; + + for (i=0; i et al. * @@ -113,12 +113,12 @@ struct mtd_info { * flash devices. The user data is one time programmable but the * factory data is read only. */ - int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); - + int (*get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len); int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); - - /* This function is not yet implemented */ + int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len); + int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); + int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len); /* kvec-based read/write methods. We need these especially for NAND flash, with its limited number of write cycles per erase. diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h index a76ab898f44..091eb571e99 100644 --- a/include/mtd/mtd-abi.h +++ b/include/mtd/mtd-abi.h @@ -1,5 +1,5 @@ /* - * $Id: mtd-abi.h,v 1.7 2004/11/23 15:37:32 gleixner Exp $ + * $Id: mtd-abi.h,v 1.8 2005/02/08 17:11:16 nico Exp $ * * Portions of MTD ABI definition which are shared by kernel and user space */ @@ -80,6 +80,12 @@ struct region_info_user { uint32_t regionindex; }; +struct otp_info { + uint32_t start; + uint32_t length; + uint32_t locked; +}; + #define MEMGETINFO _IOR('M', 1, struct mtd_info_user) #define MEMERASE _IOW('M', 2, struct erase_info_user) #define MEMWRITEOOB _IOWR('M', 3, struct mtd_oob_buf) -- cgit v1.2.3-70-g09d2 From 8f15fd55f9bf266139b10850947e19c4e3f4e9b7 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 9 Feb 2005 09:17:45 +0000 Subject: [JFFS2] Add support for JFFS2-on-Dataflash devices. For Dataflash, can_mark_obsolete = false and the NAND write buffering code (wbuf.c) is used. Since the DataFlash chip will automatically erase pages when writing, the cleanmarkers are not needed - so cleanmarker_oob = false and cleanmarker_size = 0 DataFlash page-sizes are not a power of two (they're multiples of 528 bytes). The SECTOR_ADDR macro (added in the previous core patch) is replaced with a (slower) div/mod version if CONFIG_JFFS2_FS_DATAFLASH is selected. Signed-off-by: Andrew Victor Signed-off-by: Thomas Gleixner --- fs/Kconfig | 7 +++++++ fs/jffs2/Makefile | 3 ++- fs/jffs2/erase.c | 13 ++++++++++--- fs/jffs2/fs.c | 21 ++++++++++++++++++++- fs/jffs2/os-linux.h | 18 ++++++++++++++++-- fs/jffs2/scan.c | 11 +++++++---- fs/jffs2/wbuf.c | 35 ++++++++++++++++++++++++++++++++--- include/linux/jffs2_fs_sb.h | 4 ++-- include/mtd/mtd-abi.h | 3 ++- 9 files changed, 98 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/Kconfig b/fs/Kconfig index 6a4ad4bb7a5..07835d24c78 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1084,6 +1084,13 @@ config JFFS2_FS_NOR_ECC ECC for JFFS2. This type of flash chip is not common, however it is available from ST Microelectronics. +config JFFS2_FS_DATAFLASH + bool "JFFS2 support for DataFlash (EXPERIMENTAL)" + depends on JFFS2_FS && EXPERIMENTAL + default n + help + This enables the experimental support for JFFS2 on DataFlash devices. + config JFFS2_COMPRESSION_OPTIONS bool "Advanced compression options for JFFS2" depends on JFFS2_FS diff --git a/fs/jffs2/Makefile b/fs/jffs2/Makefile index e3c38ccf9c7..6c2ebe176b4 100644 --- a/fs/jffs2/Makefile +++ b/fs/jffs2/Makefile @@ -1,7 +1,7 @@ # # Makefile for the Linux Journalling Flash File System v2 (JFFS2) # -# $Id: Makefile.common,v 1.7 2004/11/03 12:57:38 jwboyer Exp $ +# $Id: Makefile.common,v 1.8 2005/02/09 09:17:40 pavlov Exp $ # obj-$(CONFIG_JFFS2_FS) += jffs2.o @@ -13,6 +13,7 @@ jffs2-y += super.o jffs2-$(CONFIG_JFFS2_FS_NAND) += wbuf.o jffs2-$(CONFIG_JFFS2_FS_NOR_ECC) += wbuf.o +jffs2-$(CONFIG_JFFS2_FS_DATAFLASH) += wbuf.o jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index ae858f87887..a3c6cc15049 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: erase.c,v 1.70 2005/02/09 09:09:01 pavlov Exp $ + * $Id: erase.c,v 1.71 2005/02/09 09:17:40 pavlov Exp $ * */ @@ -310,7 +310,7 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb int ret; uint32_t bad_offset; - if (!jffs2_cleanmarker_oob(c)) { + if ((!jffs2_cleanmarker_oob(c)) && (c->cleanmarker_size > 0)) { marker_ref = jffs2_alloc_raw_node_ref(); if (!marker_ref) { printk(KERN_WARNING "Failed to allocate raw node ref for clean marker\n"); @@ -351,7 +351,7 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb bad_offset += i; printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", datum, bad_offset); bad: - if (!jffs2_cleanmarker_oob(c)) + if ((!jffs2_cleanmarker_oob(c)) && (c->cleanmarker_size > 0)) jffs2_free_raw_node_ref(marker_ref); kfree(ebuf); bad2: @@ -383,6 +383,13 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb jeb->first_node = jeb->last_node = NULL; + jeb->free_size = c->sector_size; + jeb->used_size = 0; + jeb->dirty_size = 0; + jeb->wasted_size = 0; + } else if (c->cleanmarker_size == 0) { + jeb->first_node = jeb->last_node = NULL; + jeb->free_size = c->sector_size; jeb->used_size = 0; jeb->dirty_size = 0; diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 30ab233fe42..5b7c960a047 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: fs.c,v 1.51 2004/11/28 12:19:37 dedekind Exp $ + * $Id: fs.c,v 1.52 2005/02/09 09:17:40 pavlov Exp $ * */ @@ -456,6 +456,12 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) return -EINVAL; } #endif +#ifndef CONFIG_JFFS2_FS_DATAFLASH + if (c->mtd->type == MTD_DATAFLASH) { + printk(KERN_ERR "jffs2: Cannot operate on DataFlash unless jffs2 DataFlash support is compiled in.\n"); + return -EINVAL; + } +#endif c->flash_size = c->mtd->size; @@ -661,6 +667,14 @@ static int jffs2_flash_setup(struct jffs2_sb_info *c) { if (ret) return ret; } + + /* and Dataflash */ + if (jffs2_dataflash(c)) { + ret = jffs2_dataflash_setup(c); + if (ret) + return ret; + } + return ret; } @@ -674,4 +688,9 @@ void jffs2_flash_cleanup(struct jffs2_sb_info *c) { if (jffs2_nor_ecc(c)) { jffs2_nor_ecc_flash_cleanup(c); } + + /* and DataFlash */ + if (jffs2_dataflash(c)) { + jffs2_dataflash_cleanup(c); + } } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 0412416d1f2..af27b84007a 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: os-linux.h,v 1.52 2005/02/09 09:09:01 pavlov Exp $ + * $Id: os-linux.h,v 1.53 2005/02/09 09:17:41 pavlov Exp $ * */ @@ -97,12 +97,16 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #endif } +#ifdef CONFIG_JFFS2_FS_DATAFLASH +#define SECTOR_ADDR(x) ( ((unsigned long)(x) / (unsigned long)(c->sector_size)) * c->sector_size ) +#else #define SECTOR_ADDR(x) ( ((unsigned long)(x) & ~(c->sector_size-1)) ) +#endif #define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & MS_RDONLY) #define jffs2_is_writebuffered(c) (c->wbuf != NULL) -#if (!defined CONFIG_JFFS2_FS_NAND && !defined CONFIG_JFFS2_FS_NOR_ECC) +#if (!defined CONFIG_JFFS2_FS_NAND && !defined CONFIG_JFFS2_FS_NOR_ECC && !defined CONFIG_JFFS2_FS_DATAFLASH) #define jffs2_can_mark_obsolete(c) (1) #define jffs2_cleanmarker_oob(c) (0) #define jffs2_write_nand_cleanmarker(c,jeb) (-EIO) @@ -119,6 +123,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #define jffs2_wbuf_timeout NULL #define jffs2_wbuf_process NULL #define jffs2_nor_ecc(c) (0) +#define jffs2_dataflash(c) (0) #define jffs2_nor_ecc_flash_setup(c) (0) #define jffs2_nor_ecc_flash_cleanup(c) do {} while (0) @@ -154,6 +159,15 @@ void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c); #define jffs2_nor_ecc_flash_setup(c) (0) #define jffs2_nor_ecc_flash_cleanup(c) do {} while (0) #endif /* NOR ECC */ +#ifdef CONFIG_JFFS2_FS_DATAFLASH +#define jffs2_dataflash(c) (c->mtd->type == MTD_DATAFLASH) +int jffs2_dataflash_setup(struct jffs2_sb_info *c); +void jffs2_dataflash_cleanup(struct jffs2_sb_info *c); +#else +#define jffs2_dataflash(c) (0) +#define jffs2_dataflash_setup(c) (0) +#define jffs2_dataflash_cleanup(c) do {} while (0) +#endif /* DATAFLASH */ #endif /* NAND */ /* erase.c */ diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 76859ff5343..e8c43746c82 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: scan.c,v 1.116 2005/02/09 09:09:02 pavlov Exp $ + * $Id: scan.c,v 1.117 2005/02/09 09:17:41 pavlov Exp $ * */ #include @@ -68,7 +68,7 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo static inline int min_free(struct jffs2_sb_info *c) { uint32_t min = 2 * sizeof(struct jffs2_raw_inode); -#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC +#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC || defined CONFIG_JFFS2_FS_DATAFLASH if (!jffs2_can_mark_obsolete(c) && min < c->wbuf_pagesize) return c->wbuf_pagesize; #endif @@ -228,7 +228,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) c->dirty_size -= c->nextblock->dirty_size; c->nextblock->dirty_size = 0; } -#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC +#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC || defined CONFIG_JFFS2_FS_DATAFLASH if (!jffs2_can_mark_obsolete(c) && c->nextblock && (c->nextblock->free_size & (c->wbuf_pagesize-1))) { /* If we're going to start writing into a block which already contains data, and the end of the data isn't page-aligned, @@ -351,7 +351,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo } #endif D1(printk(KERN_DEBUG "Block at 0x%08x is empty (erased)\n", jeb->offset)); - return BLK_STATE_ALLFF; /* OK to erase if all blocks are like this */ + if (c->cleanmarker_size == 0) + return BLK_STATE_CLEANMARKER; /* don't bother with re-erase */ + else + return BLK_STATE_ALLFF; /* OK to erase if all blocks are like this */ } if (ofs) { D1(printk(KERN_DEBUG "Free space at %08x ends at %08x\n", jeb->offset, diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 894dea88678..a35e007e5bf 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -9,7 +9,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: wbuf.c,v 1.87 2005/02/09 09:09:02 pavlov Exp $ + * $Id: wbuf.c,v 1.88 2005/02/09 09:17:41 pavlov Exp $ * */ @@ -435,7 +435,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) if we have a switch to next page, we will not have enough remaining space for this. */ - if (pad) { + if (pad && !jffs2_dataflash(c)) { c->wbuf_len = PAD(c->wbuf_len); /* Pad with JFFS2_DIRTY_BITMASK initially. this helps out ECC'd NOR @@ -486,7 +486,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) spin_lock(&c->erase_completion_lock); /* Adjust free size of the block if we padded. */ - if (pad) { + if (pad && !jffs2_dataflash(c)) { struct jffs2_eraseblock *jeb; jeb = &c->blocks[c->wbuf_ofs / c->sector_size]; @@ -604,8 +604,14 @@ int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c) return ret; } +#ifdef CONFIG_JFFS2_FS_DATAFLASH +#define PAGE_DIV(x) ( ((unsigned long)(x) / (unsigned long)(c->wbuf_pagesize)) * (unsigned long)(c->wbuf_pagesize) ) +#define PAGE_MOD(x) ( (unsigned long)(x) % (unsigned long)(c->wbuf_pagesize) ) +#else #define PAGE_DIV(x) ( (x) & (~(c->wbuf_pagesize - 1)) ) #define PAGE_MOD(x) ( (x) & (c->wbuf_pagesize - 1) ) +#endif + int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsigned long count, loff_t to, size_t *retlen, uint32_t ino) { struct kvec outvecs[3]; @@ -1192,6 +1198,29 @@ void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c) kfree(c->wbuf); } +#ifdef CONFIG_JFFS2_FS_DATAFLASH +int jffs2_dataflash_setup(struct jffs2_sb_info *c) { + c->cleanmarker_size = 0; /* No cleanmarkers needed */ + + /* Initialize write buffer */ + init_rwsem(&c->wbuf_sem); + c->wbuf_pagesize = c->sector_size; + c->wbuf_ofs = 0xFFFFFFFF; + + c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL); + if (!c->wbuf) + return -ENOMEM; + + printk(KERN_INFO "JFFS2 write-buffering enabled (%i)\n", c->wbuf_pagesize); + + return 0; +} + +void jffs2_dataflash_cleanup(struct jffs2_sb_info *c) { + kfree(c->wbuf); +} +#endif + #ifdef CONFIG_JFFS2_FS_NOR_ECC int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c) { /* Cleanmarker is actually larger on the flashes */ diff --git a/include/linux/jffs2_fs_sb.h b/include/linux/jffs2_fs_sb.h index 4afc8d8c2e9..faec29559fe 100644 --- a/include/linux/jffs2_fs_sb.h +++ b/include/linux/jffs2_fs_sb.h @@ -1,4 +1,4 @@ -/* $Id: jffs2_fs_sb.h,v 1.48 2004/11/20 10:41:12 dwmw2 Exp $ */ +/* $Id: jffs2_fs_sb.h,v 1.49 2005/02/09 09:17:41 pavlov Exp $ */ #ifndef _JFFS2_FS_SB #define _JFFS2_FS_SB @@ -94,7 +94,7 @@ struct jffs2_sb_info { to an obsoleted node. I don't like this. Alternatives welcomed. */ struct semaphore erase_free_sem; -#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC +#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC || defined CONFIG_JFFS2_FS_DATAFLASH /* Write-behind buffer for NAND flash */ unsigned char *wbuf; uint32_t wbuf_ofs; diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h index c984cb2c941..cacb9842b19 100644 --- a/include/mtd/mtd-abi.h +++ b/include/mtd/mtd-abi.h @@ -1,5 +1,5 @@ /* - * $Id: mtd-abi.h,v 1.9 2005/02/08 17:45:52 nico Exp $ + * $Id: mtd-abi.h,v 1.10 2005/02/09 09:17:42 pavlov Exp $ * * Portions of MTD ABI definition which are shared by kernel and user space */ @@ -29,6 +29,7 @@ struct mtd_oob_buf { #define MTD_NORFLASH 3 #define MTD_NANDFLASH 4 #define MTD_PEROM 5 +#define MTD_DATAFLASH 6 #define MTD_OTHER 14 #define MTD_UNKNOWN 15 -- cgit v1.2.3-70-g09d2 From 2f82ce1eb637c06dfc60f095cd1891ae0ba4894c Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 9 Feb 2005 09:24:26 +0000 Subject: [JFFS2] Use a single config option for write buffer support This patch replaces the current CONFIG_JFFS2_FS_NAND, CONFIG_JFFS2_FS_NOR_ECC and CONFIG_JFFS2_FS_DATAFLASH with a single configuration option - CONFIG_JFFS2_FS_WRITEBUFFER. The only functional change of this patch is that the slower div/mod calculations for SECTOR_ADDR(), PAGE_DIV() and PAGE_MOD() are now always used when CONFIG_JFFS2_FS_WRITEBUFFER is enabled. Signed-off-by: Andrew Victor Signed-off-by: Thomas Gleixner --- fs/Kconfig | 33 +++++++++------------------------ fs/jffs2/Makefile | 6 ++---- fs/jffs2/fs.c | 6 ++---- fs/jffs2/nodelist.h | 4 ++-- fs/jffs2/os-linux.h | 28 ++++++++-------------------- fs/jffs2/scan.c | 12 ++++++------ fs/jffs2/super.c | 4 ++-- fs/jffs2/wbuf.c | 8 ++------ include/linux/jffs2_fs_sb.h | 4 ++-- 9 files changed, 35 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/fs/Kconfig b/fs/Kconfig index 07835d24c78..475769c25d6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1063,33 +1063,18 @@ config JFFS2_FS_DEBUG If reporting bugs, please try to have available a full dump of the messages at debug level 1 while the misbehaviour was occurring. -config JFFS2_FS_NAND - bool "JFFS2 support for NAND flash" +config JFFS2_FS_WRITEBUFFER + bool "JFFS2 write-buffering support" depends on JFFS2_FS - default n + default y help - This enables the support for NAND flash in JFFS2. NAND is a newer - type of flash chip design than the traditional NOR flash, with - higher density but a handful of characteristics which make it more - interesting for the file system to use. + This enables the write-buffering support in JFFS2. - Say 'N' unless you have NAND flash. - -config JFFS2_FS_NOR_ECC - bool "JFFS2 support for ECC'd NOR flash (EXPERIMENTAL)" - depends on JFFS2_FS && EXPERIMENTAL - default n - help - This enables the experimental support for NOR flash with transparent - ECC for JFFS2. This type of flash chip is not common, however it is - available from ST Microelectronics. - -config JFFS2_FS_DATAFLASH - bool "JFFS2 support for DataFlash (EXPERIMENTAL)" - depends on JFFS2_FS && EXPERIMENTAL - default n - help - This enables the experimental support for JFFS2 on DataFlash devices. + This functionality is required to support JFFS2 on the following + types of flash devices: + - NAND flash + - NOR flash with transparent ECC + - DataFlash config JFFS2_COMPRESSION_OPTIONS bool "Advanced compression options for JFFS2" diff --git a/fs/jffs2/Makefile b/fs/jffs2/Makefile index 6c2ebe176b4..f1afe681ecd 100644 --- a/fs/jffs2/Makefile +++ b/fs/jffs2/Makefile @@ -1,7 +1,7 @@ # # Makefile for the Linux Journalling Flash File System v2 (JFFS2) # -# $Id: Makefile.common,v 1.8 2005/02/09 09:17:40 pavlov Exp $ +# $Id: Makefile.common,v 1.9 2005/02/09 09:23:53 pavlov Exp $ # obj-$(CONFIG_JFFS2_FS) += jffs2.o @@ -11,9 +11,7 @@ jffs2-y += read.o nodemgmt.o readinode.o write.o scan.o gc.o jffs2-y += symlink.o build.o erase.o background.o fs.o writev.o jffs2-y += super.o -jffs2-$(CONFIG_JFFS2_FS_NAND) += wbuf.o -jffs2-$(CONFIG_JFFS2_FS_NOR_ECC) += wbuf.o -jffs2-$(CONFIG_JFFS2_FS_DATAFLASH) += wbuf.o +jffs2-$(CONFIG_JFFS2_FS_WRITEBUFFER) += wbuf.o jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 5b7c960a047..c91c66e5e86 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: fs.c,v 1.52 2005/02/09 09:17:40 pavlov Exp $ + * $Id: fs.c,v 1.53 2005/02/09 09:23:53 pavlov Exp $ * */ @@ -450,13 +450,11 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) c = JFFS2_SB_INFO(sb); -#ifndef CONFIG_JFFS2_FS_NAND +#ifndef CONFIG_JFFS2_FS_WRITEBUFFER if (c->mtd->type == MTD_NANDFLASH) { printk(KERN_ERR "jffs2: Cannot operate on NAND flash unless jffs2 NAND support is compiled in.\n"); return -EINVAL; } -#endif -#ifndef CONFIG_JFFS2_FS_DATAFLASH if (c->mtd->type == MTD_DATAFLASH) { printk(KERN_ERR "jffs2: Cannot operate on DataFlash unless jffs2 DataFlash support is compiled in.\n"); return -EINVAL; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index a4864d05ea9..8c122838bf6 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: nodelist.h,v 1.126 2004/11/19 15:06:29 dedekind Exp $ + * $Id: nodelist.h,v 1.127 2005/02/09 09:23:53 pavlov Exp $ * */ @@ -462,7 +462,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c); /* erase.c */ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count); -#ifdef CONFIG_JFFS2_FS_NAND +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER /* wbuf.c */ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino); int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c); diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index af27b84007a..8989cd685e4 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: os-linux.h,v 1.53 2005/02/09 09:17:41 pavlov Exp $ + * $Id: os-linux.h,v 1.54 2005/02/09 09:23:53 pavlov Exp $ * */ @@ -97,16 +97,12 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #endif } -#ifdef CONFIG_JFFS2_FS_DATAFLASH -#define SECTOR_ADDR(x) ( ((unsigned long)(x) / (unsigned long)(c->sector_size)) * c->sector_size ) -#else -#define SECTOR_ADDR(x) ( ((unsigned long)(x) & ~(c->sector_size-1)) ) -#endif #define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & MS_RDONLY) #define jffs2_is_writebuffered(c) (c->wbuf != NULL) -#if (!defined CONFIG_JFFS2_FS_NAND && !defined CONFIG_JFFS2_FS_NOR_ECC && !defined CONFIG_JFFS2_FS_DATAFLASH) +#ifndef CONFIG_JFFS2_FS_WRITEBUFFER +#define SECTOR_ADDR(x) ( ((unsigned long)(x) & ~(c->sector_size-1)) ) #define jffs2_can_mark_obsolete(c) (1) #define jffs2_cleanmarker_oob(c) (0) #define jffs2_write_nand_cleanmarker(c,jeb) (-EIO) @@ -129,6 +125,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #else /* NAND and/or ECC'd NOR support present */ +#define SECTOR_ADDR(x) ( ((unsigned long)(x) / (unsigned long)(c->sector_size)) * c->sector_size ) #define jffs2_can_mark_obsolete(c) ((c->mtd->type == MTD_NORFLASH && !(c->mtd->flags & MTD_ECC)) || c->mtd->type == MTD_RAM) #define jffs2_cleanmarker_oob(c) (c->mtd->type == MTD_NANDFLASH) @@ -150,25 +147,16 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino); int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c); int jffs2_nand_flash_setup(struct jffs2_sb_info *c); void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c); -#ifdef CONFIG_JFFS2_FS_NOR_ECC + #define jffs2_nor_ecc(c) (c->mtd->type == MTD_NORFLASH && (c->mtd->flags & MTD_ECC)) int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c); void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c); -#else -#define jffs2_nor_ecc(c) (0) -#define jffs2_nor_ecc_flash_setup(c) (0) -#define jffs2_nor_ecc_flash_cleanup(c) do {} while (0) -#endif /* NOR ECC */ -#ifdef CONFIG_JFFS2_FS_DATAFLASH + #define jffs2_dataflash(c) (c->mtd->type == MTD_DATAFLASH) int jffs2_dataflash_setup(struct jffs2_sb_info *c); void jffs2_dataflash_cleanup(struct jffs2_sb_info *c); -#else -#define jffs2_dataflash(c) (0) -#define jffs2_dataflash_setup(c) (0) -#define jffs2_dataflash_cleanup(c) do {} while (0) -#endif /* DATAFLASH */ -#endif /* NAND */ + +#endif /* WRITEBUFFER */ /* erase.c */ static inline void jffs2_erase_pending_trigger(struct jffs2_sb_info *c) diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index e8c43746c82..bc6c9998002 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: scan.c,v 1.117 2005/02/09 09:17:41 pavlov Exp $ + * $Id: scan.c,v 1.118 2005/02/09 09:23:53 pavlov Exp $ * */ #include @@ -68,7 +68,7 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo static inline int min_free(struct jffs2_sb_info *c) { uint32_t min = 2 * sizeof(struct jffs2_raw_inode); -#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC || defined CONFIG_JFFS2_FS_DATAFLASH +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER if (!jffs2_can_mark_obsolete(c) && min < c->wbuf_pagesize) return c->wbuf_pagesize; #endif @@ -228,7 +228,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) c->dirty_size -= c->nextblock->dirty_size; c->nextblock->dirty_size = 0; } -#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC || defined CONFIG_JFFS2_FS_DATAFLASH +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER if (!jffs2_can_mark_obsolete(c) && c->nextblock && (c->nextblock->free_size & (c->wbuf_pagesize-1))) { /* If we're going to start writing into a block which already contains data, and the end of the data isn't page-aligned, @@ -294,7 +294,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo uint32_t hdr_crc, buf_ofs, buf_len; int err; int noise = 0; -#ifdef CONFIG_JFFS2_FS_NAND +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER int cleanmarkerfound = 0; #endif @@ -303,7 +303,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo D1(printk(KERN_DEBUG "jffs2_scan_eraseblock(): Scanning block at 0x%x\n", ofs)); -#ifdef CONFIG_JFFS2_FS_NAND +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER if (jffs2_cleanmarker_oob(c)) { int ret = jffs2_check_nand_cleanmarker(c, jeb); D2(printk(KERN_NOTICE "jffs_check_nand_cleanmarker returned %d\n",ret)); @@ -338,7 +338,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo ofs += 4; if (ofs == EMPTY_SCAN_SIZE(c->sector_size)) { -#ifdef CONFIG_JFFS2_FS_NAND +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER if (jffs2_cleanmarker_oob(c)) { /* scan oob, take care of cleanmarker */ int ret = jffs2_check_oob_empty(c, jeb, cleanmarkerfound); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 6b2a441d276..3bfc121a467 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: super.c,v 1.104 2004/11/23 15:37:31 gleixner Exp $ + * $Id: super.c,v 1.105 2005/02/09 09:23:54 pavlov Exp $ * */ @@ -309,7 +309,7 @@ static int __init init_jffs2_fs(void) int ret; printk(KERN_INFO "JFFS2 version 2.2." -#ifdef CONFIG_JFFS2_FS_NAND +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER " (NAND)" #endif " (C) 2001-2003 Red Hat, Inc.\n"); diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index a35e007e5bf..890258505a7 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -9,7 +9,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: wbuf.c,v 1.88 2005/02/09 09:17:41 pavlov Exp $ + * $Id: wbuf.c,v 1.89 2005/02/09 09:23:54 pavlov Exp $ * */ @@ -604,7 +604,7 @@ int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c) return ret; } -#ifdef CONFIG_JFFS2_FS_DATAFLASH +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER #define PAGE_DIV(x) ( ((unsigned long)(x) / (unsigned long)(c->wbuf_pagesize)) * (unsigned long)(c->wbuf_pagesize) ) #define PAGE_MOD(x) ( (unsigned long)(x) % (unsigned long)(c->wbuf_pagesize) ) #else @@ -1198,7 +1198,6 @@ void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c) kfree(c->wbuf); } -#ifdef CONFIG_JFFS2_FS_DATAFLASH int jffs2_dataflash_setup(struct jffs2_sb_info *c) { c->cleanmarker_size = 0; /* No cleanmarkers needed */ @@ -1219,9 +1218,7 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { void jffs2_dataflash_cleanup(struct jffs2_sb_info *c) { kfree(c->wbuf); } -#endif -#ifdef CONFIG_JFFS2_FS_NOR_ECC int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c) { /* Cleanmarker is actually larger on the flashes */ c->cleanmarker_size = 16; @@ -1241,4 +1238,3 @@ int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c) { void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c) { kfree(c->wbuf); } -#endif diff --git a/include/linux/jffs2_fs_sb.h b/include/linux/jffs2_fs_sb.h index faec29559fe..1bd6cdfb7d7 100644 --- a/include/linux/jffs2_fs_sb.h +++ b/include/linux/jffs2_fs_sb.h @@ -1,4 +1,4 @@ -/* $Id: jffs2_fs_sb.h,v 1.49 2005/02/09 09:17:41 pavlov Exp $ */ +/* $Id: jffs2_fs_sb.h,v 1.50 2005/02/09 09:23:55 pavlov Exp $ */ #ifndef _JFFS2_FS_SB #define _JFFS2_FS_SB @@ -94,7 +94,7 @@ struct jffs2_sb_info { to an obsoleted node. I don't like this. Alternatives welcomed. */ struct semaphore erase_free_sem; -#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC || defined CONFIG_JFFS2_FS_DATAFLASH +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER /* Write-behind buffer for NAND flash */ unsigned char *wbuf; uint32_t wbuf_ofs; -- cgit v1.2.3-70-g09d2 From 0040bf382c77414739c933e4d2ee35ff817d0b99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 Feb 2005 12:20:00 +0000 Subject: [MTD] NAND: Skip bad block table scan on request Signed-off-by: Thomas Gleixner --- drivers/mtd/nand/nand_base.c | 6 +++++- include/linux/mtd/nand.h | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 7094dd5716d..99abd615a46 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -59,7 +59,7 @@ * The AG-AND chips have nice features for speed improvement, * which are not supported yet. Read / program 4 pages in one go. * - * $Id: nand_base.c,v 1.130 2005/01/24 03:07:43 dmarlin Exp $ + * $Id: nand_base.c,v 1.131 2005/02/09 12:19:56 gleixner Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2631,6 +2631,10 @@ int nand_scan (struct mtd_info *mtd, int maxchips) memcpy(&mtd->oobinfo, this->autooob, sizeof(mtd->oobinfo)); mtd->owner = THIS_MODULE; + + /* Check, if we should skip the bad block table scan */ + if (this->options & NAND_SKIP_BBTSCAN) + return 0; /* Build bad block table */ return this->scan_bbt (mtd); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index cf52f20c6de..cf25c7cfd0b 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -5,7 +5,7 @@ * Steven J. Hill * Thomas Gleixner * - * $Id: nand.h,v 1.70 2005/01/24 03:07:42 dmarlin Exp $ + * $Id: nand.h,v 1.71 2005/02/09 12:12:59 gleixner Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -223,7 +223,8 @@ extern int nand_read_raw (struct mtd_info *mtd, uint8_t *buf, loff_t from, size_ * This can only work if we have the ecc bytes directly behind the * data bytes. Applies for DOC and AG-AND Renesas HW Reed Solomon generators */ #define NAND_HWECC_SYNDROME 0x00020000 - +/* This option skips the bbt scan during initialization. */ +#define NAND_SKIP_BBTSCAN 0x00040000 /* Options set by nand scan */ /* Nand scan has allocated oob_buf */ -- cgit v1.2.3-70-g09d2 From f16407d73effc59e1e9f88e45a3dc53cacbb8264 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 16 Feb 2005 15:55:03 +0000 Subject: [MTD] Quiet unused variable warning Signed-off-by: Nioclas Pitre Signed-off-by: Thomas Gleixner --- include/linux/mtd/map.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 8fc6679aa9b..115b14a634d 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -1,6 +1,6 @@ /* Overhauled routines for dealing with different mmap regions of flash */ -/* $Id: map.h,v 1.47 2005/02/08 17:11:15 nico Exp $ */ +/* $Id: map.h,v 1.48 2005/02/16 15:54:59 nico Exp $ */ #ifndef __LINUX_MTD_MAP_H__ #define __LINUX_MTD_MAP_H__ @@ -418,7 +418,7 @@ extern void simple_map_init(struct map_info *); #define simple_map_init(map) BUG_ON(!map_bankwidth_supported((map)->bankwidth)) -#define map_is_linear(map) (1) +#define map_is_linear(map) ({ (void)(map); 1; }) #endif /* !CONFIG_MTD_COMPLEX_MAPPINGS */ -- cgit v1.2.3-70-g09d2 From 31fbdf7aa5aac8a2a34f180a25deb157297a10c9 Mon Sep 17 00:00:00 2001 From: "Artem B. Bityuckiy" Date: Mon, 28 Feb 2005 08:21:09 +0000 Subject: [JFFS2] Fix NOR specific scan BUG Fix fairly sad NOR-specific bug - during FS building ic->scan_dents isn't zero, but jffs2_mark_node_obsolete() migt be called it tries to finde the ic corresponding to ref - this requires ic->scan_dents = 0. Signed-off-by: Artem B. Bityuckiy Signed-off-by: Thomas Gleixner --- fs/jffs2/build.c | 9 ++++++--- fs/jffs2/nodemgmt.c | 11 ++++++----- include/linux/jffs2_fs_sb.h | 5 +++-- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index a01dd5fdbb9..3dd5394921c 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: build.c,v 1.69 2004/12/16 20:22:18 dmarlin Exp $ + * $Id: build.c,v 1.70 2005/02/28 08:21:05 dedekind Exp $ * */ @@ -97,14 +97,16 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) /* First, scan the medium and build all the inode caches with lists of physical nodes */ - c->flags |= JFFS2_SB_FLAG_MOUNTING; + c->flags |= JFFS2_SB_FLAG_SCANNING; ret = jffs2_scan_medium(c); + c->flags &= ~JFFS2_SB_FLAG_SCANNING; if (ret) goto exit; D1(printk(KERN_DEBUG "Scanned flash completely\n")); D2(jffs2_dump_block_lists(c)); + c->flags |= JFFS2_SB_FLAG_BUILDING; /* Now scan the directory tree, increasing nlink according to every dirent found. */ for_each_inode(i, c, ic) { D1(printk(KERN_DEBUG "Pass 1: ino #%u\n", ic->ino)); @@ -116,7 +118,6 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) cond_resched(); } } - c->flags &= ~JFFS2_SB_FLAG_MOUNTING; D1(printk(KERN_DEBUG "Pass 1 complete\n")); @@ -164,6 +165,8 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) ic->scan_dents = NULL; cond_resched(); } + c->flags &= ~JFFS2_SB_FLAG_BUILDING; + D1(printk(KERN_DEBUG "Pass 3 complete\n")); D2(jffs2_dump_block_lists(c)); diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index f9dcac1415a..456adf020f2 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: nodemgmt.c,v 1.118 2005/02/27 23:01:32 dwmw2 Exp $ + * $Id: nodemgmt.c,v 1.119 2005/02/28 08:21:05 dedekind Exp $ * */ @@ -403,7 +403,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref jeb = &c->blocks[blocknr]; if (jffs2_can_mark_obsolete(c) && !jffs2_is_readonly(c) && - !(c->flags & JFFS2_SB_FLAG_MOUNTING)) { + !(c->flags & (JFFS2_SB_FLAG_SCANNING | JFFS2_SB_FLAG_BUILDING))) { /* Hm. This may confuse static lock analysis. If any of the above three conditions is false, we're going to return from this function without actually obliterating any nodes or freeing @@ -470,8 +470,8 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref D1(ACCT_PARANOIA_CHECK(jeb)); - if (c->flags & JFFS2_SB_FLAG_MOUNTING) { - /* Mount in progress. Don't muck about with the block + if (c->flags & JFFS2_SB_FLAG_SCANNING) { + /* Flash scanning is in progress. Don't muck about with the block lists because they're not ready yet, and don't actually obliterate nodes that look obsolete. If they weren't marked obsolete on the flash at the time they _became_ @@ -530,7 +530,8 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref spin_unlock(&c->erase_completion_lock); - if (!jffs2_can_mark_obsolete(c) || jffs2_is_readonly(c)) { + if (!jffs2_can_mark_obsolete(c) || jffs2_is_readonly(c) || + (c->flags & JFFS2_SB_FLAG_BUILDING)) { /* We didn't lock the erase_free_sem */ return; } diff --git a/include/linux/jffs2_fs_sb.h b/include/linux/jffs2_fs_sb.h index 1bd6cdfb7d7..350b82bd652 100644 --- a/include/linux/jffs2_fs_sb.h +++ b/include/linux/jffs2_fs_sb.h @@ -1,4 +1,4 @@ -/* $Id: jffs2_fs_sb.h,v 1.50 2005/02/09 09:23:55 pavlov Exp $ */ +/* $Id: jffs2_fs_sb.h,v 1.51 2005/02/28 08:21:06 dedekind Exp $ */ #ifndef _JFFS2_FS_SB #define _JFFS2_FS_SB @@ -14,7 +14,8 @@ #include #define JFFS2_SB_FLAG_RO 1 -#define JFFS2_SB_FLAG_MOUNTING 2 +#define JFFS2_SB_FLAG_SCANNING 2 /* Flash scanning is in progress */ +#define JFFS2_SB_FLAG_BUILDING 4 /* File system building is in progress */ struct jffs2_inodirty; -- cgit v1.2.3-70-g09d2 From 0514cd938009de1d6b3239d98c3cf2a67b620103 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 14 Mar 2005 18:27:18 +0000 Subject: [MTD] Fixed signed 1bit bitfield Signed-off-by: Ben Dooks Signed-off-by: Thomas Gleixner --- include/linux/mtd/flashchip.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h index e778a1ab23c..675776fa3e2 100644 --- a/include/linux/mtd/flashchip.h +++ b/include/linux/mtd/flashchip.h @@ -6,7 +6,7 @@ * * (C) 2000 Red Hat. GPLd. * - * $Id: flashchip.h,v 1.16 2005/02/08 17:11:15 nico Exp $ + * $Id: flashchip.h,v 1.17 2005/03/14 18:27:15 bjd Exp $ * */ @@ -63,8 +63,8 @@ struct flchip { flstate_t state; flstate_t oldstate; - int write_suspended:1; - int erase_suspended:1; + unsigned int write_suspended:1; + unsigned int erase_suspended:1; unsigned long in_progress_block_addr; spinlock_t *mutex; -- cgit v1.2.3-70-g09d2 From c927cd3a226bed5cf063cdf04de13cef51144cef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Mar 2005 19:03:16 +0000 Subject: [MTD] Add the reverse operation of cfi_build_cmd() This is necessary to fix the broken status check in cfi_cmdset_0001 Signed-off-by: Thomas Gleixner --- include/linux/mtd/cfi.h | 65 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 76255474a27..66e0a32efba 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -1,7 +1,7 @@ /* Common Flash Interface structures * See http://support.intel.com/design/flash/technote/index.htm - * $Id: cfi.h,v 1.52 2005/02/08 17:11:15 nico Exp $ + * $Id: cfi.h,v 1.53 2005/03/15 19:03:13 gleixner Exp $ */ #ifndef __MTD_CFI_H__ @@ -315,6 +315,69 @@ static inline map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cf } #define CMD(x) cfi_build_cmd((x), map, cfi) + +static inline unsigned char cfi_merge_status(map_word val, struct map_info *map, + struct cfi_private *cfi) +{ + int wordwidth, words_per_bus, chip_mode, chips_per_word; + unsigned long onestat, res = 0; + int i; + + /* We do it this way to give the compiler a fighting chance + of optimising away all the crap for 'bankwidth' larger than + an unsigned long, in the common case where that support is + disabled */ + if (map_bankwidth_is_large(map)) { + wordwidth = sizeof(unsigned long); + words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1 + } else { + wordwidth = map_bankwidth(map); + words_per_bus = 1; + } + + chip_mode = map_bankwidth(map) / cfi_interleave(cfi); + chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map); + + onestat = val.x[0]; + /* Or all status words together */ + for (i=1; i < words_per_bus; i++) { + onestat |= val.x[i]; + } + + res = onestat; + switch(chips_per_word) { + default: BUG(); +#if BITS_PER_LONG >= 64 + case 8: + res |= (onestat >> (chip_mode * 32)); +#endif + case 4: + res |= (onestat >> (chip_mode * 16)); + case 2: + res |= (onestat >> (chip_mode * 8)); + case 1: + ; + } + + /* Last, determine what the bit-pattern should be for a single + device, according to chip mode and endianness... */ + switch (chip_mode) { + case 1: + break; + case 2: + res = cfi16_to_cpu(res); + break; + case 4: + res = cfi32_to_cpu(res); + break; + default: BUG(); + } + return res; +} + +#define MERGESTATUS(x) cfi_merge_status((x), map, cfi) + + /* * Sends a CFI command to a bank of flash for the given geometry. * -- cgit v1.2.3-70-g09d2 From 963a6fb0a0d336d0513083b7e4b5c3ff9d6d2061 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 1 Apr 2005 02:59:56 +0100 Subject: [MTD] Add reboot notifier to Intel NOR flash driver to make sure the flash is in array mode whenever we're about to reboot. This is especially useful to allow "soft" reboot to work which consists of branching back into the bootloader. Signed-off-by: Nicolas Pitre Signed-off-by: Thomas Gleixner --- drivers/mtd/chips/cfi_cmdset_0001.c | 45 +++++++++++++++++++++++++++++++++++-- include/linux/mtd/mtd.h | 5 ++++- 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index b482a4e48e4..dc257eb6932 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -4,7 +4,7 @@ * * (C) 2000 Red Hat. GPL'd * - * $Id: cfi_cmdset_0001.c,v 1.173 2005/03/30 23:57:30 tpoynor Exp $ + * $Id: cfi_cmdset_0001.c,v 1.174 2005/04/01 01:59:52 nico Exp $ * * * 10/10/2000 Nicolas Pitre @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,7 @@ static int cfi_intelext_get_user_prot_info (struct mtd_info *, #endif static int cfi_intelext_suspend (struct mtd_info *); static void cfi_intelext_resume (struct mtd_info *); +static int cfi_intelext_reboot (struct notifier_block *, unsigned long, void *); static void cfi_intelext_destroy(struct mtd_info *); @@ -333,7 +335,9 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary) mtd->resume = cfi_intelext_resume; mtd->flags = MTD_CAP_NORFLASH; mtd->name = map->name; - + + mtd->reboot_notifier.notifier_call = cfi_intelext_reboot; + if (cfi->cfi_mode == CFI_MODE_CFI) { /* * It's a real CFI chip, not one for which the probe @@ -446,6 +450,7 @@ static struct mtd_info *cfi_intelext_setup(struct mtd_info *mtd) goto setup_err; __module_get(THIS_MODULE); + register_reboot_notifier(&mtd->reboot_notifier); return mtd; setup_err: @@ -2301,10 +2306,46 @@ static void cfi_intelext_resume(struct mtd_info *mtd) } } +static int cfi_intelext_reset(struct mtd_info *mtd) +{ + struct map_info *map = mtd->priv; + struct cfi_private *cfi = map->fldrv_priv; + int i, ret; + + for (i=0; i < cfi->numchips; i++) { + struct flchip *chip = &cfi->chips[i]; + + /* force the completion of any ongoing operation + and switch to array mode so any bootloader in + flash is accessible for soft reboot. */ + spin_lock(chip->mutex); + ret = get_chip(map, chip, chip->start, FL_SYNCING); + if (!ret) { + map_write(map, CMD(0xff), chip->start); + chip->state = FL_READY; + } + spin_unlock(chip->mutex); + } + + return 0; +} + +static int cfi_intelext_reboot(struct notifier_block *nb, unsigned long val, + void *v) +{ + struct mtd_info *mtd; + + mtd = container_of(nb, struct mtd_info, reboot_notifier); + cfi_intelext_reset(mtd); + return NOTIFY_DONE; +} + static void cfi_intelext_destroy(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; + cfi_intelext_reset(mtd); + unregister_reboot_notifier(&mtd->reboot_notifier); kfree(cfi->cmdset_priv); kfree(cfi->cfiq); kfree(cfi->chips[0].priv); diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 3aab1b8729e..f574cd49881 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -1,5 +1,5 @@ /* - * $Id: mtd.h,v 1.57 2005/02/08 17:11:15 nico Exp $ + * $Id: mtd.h,v 1.58 2005/04/01 01:59:54 nico Exp $ * * Copyright (C) 1999-2003 David Woodhouse et al. * @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -147,6 +148,8 @@ struct mtd_info { int (*block_isbad) (struct mtd_info *mtd, loff_t ofs); int (*block_markbad) (struct mtd_info *mtd, loff_t ofs); + struct notifier_block reboot_notifier; /* default mode before reboot */ + void *priv; struct module *owner; -- cgit v1.2.3-70-g09d2 From 65c6e0a657012d104fe42be5f01a7b9b451b687c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Apr 2005 11:19:05 +0100 Subject: [MTD] Fix broken user ABI Move kernel data where it belongs. Previous change broke user abi. Signed-off-by: Thomas Gleixner --- include/linux/mtd/mtd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index f574cd49881..c50c3f3927d 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -1,5 +1,5 @@ /* - * $Id: mtd.h,v 1.58 2005/04/01 01:59:54 nico Exp $ + * $Id: mtd.h,v 1.59 2005/04/11 10:19:02 gleixner Exp $ * * Copyright (C) 1999-2003 David Woodhouse et al. * @@ -70,7 +70,6 @@ struct mtd_info { u_int32_t oobblock; // Size of OOB blocks (e.g. 512) u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) - u_int32_t oobavail; // Number of bytes in OOB area available for fs u_int32_t ecctype; u_int32_t eccsize; @@ -81,6 +80,7 @@ struct mtd_info { // oobinfo is a nand_oobinfo structure, which can be set by iotcl (MEMSETOOBINFO) struct nand_oobinfo oobinfo; + u_int32_t oobavail; // Number of bytes in OOB area available for fs /* Data for variable erase regions. If numeraseregions is zero, * it means that the whole device has erasesize as given above. -- cgit v1.2.3-70-g09d2 From fff7afd791f6a685b3ddedb8cfb152aed85f3cf8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 May 2005 17:18:11 +0100 Subject: [JFFS2] Convert thread start semaphore to completion Signed-off-by: Thomas Gleixner --- fs/jffs2/background.c | 8 ++++---- include/linux/jffs2_fs_sb.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 1be6de27dd8..5548749bacb 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -7,7 +7,7 @@ * * For licensing information, see the file 'LICENCE' in this directory. * - * $Id: background.c,v 1.50 2004/11/16 20:36:10 dwmw2 Exp $ + * $Id: background.c,v 1.52 2005/05/19 16:18:08 gleixner Exp $ * */ @@ -37,7 +37,7 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c) if (c->gc_task) BUG(); - init_MUTEX_LOCKED(&c->gc_thread_start); + init_completion(&c->gc_thread_start); init_completion(&c->gc_thread_exit); pid = kernel_thread(jffs2_garbage_collect_thread, c, CLONE_FS|CLONE_FILES); @@ -48,7 +48,7 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c) } else { /* Wait for it... */ D1(printk(KERN_DEBUG "JFFS2: Garbage collect thread is pid %d\n", pid)); - down(&c->gc_thread_start); + wait_for_completion(&c->gc_thread_start); } return ret; @@ -75,7 +75,7 @@ static int jffs2_garbage_collect_thread(void *_c) allow_signal(SIGCONT); c->gc_task = current; - up(&c->gc_thread_start); + complete(&c->gc_thread_start); set_user_nice(current, 10); diff --git a/include/linux/jffs2_fs_sb.h b/include/linux/jffs2_fs_sb.h index 350b82bd652..1e21546622d 100644 --- a/include/linux/jffs2_fs_sb.h +++ b/include/linux/jffs2_fs_sb.h @@ -1,4 +1,4 @@ -/* $Id: jffs2_fs_sb.h,v 1.51 2005/02/28 08:21:06 dedekind Exp $ */ +/* $Id: jffs2_fs_sb.h,v 1.52 2005/05/19 16:12:17 gleixner Exp $ */ #ifndef _JFFS2_FS_SB #define _JFFS2_FS_SB @@ -32,7 +32,7 @@ struct jffs2_sb_info { unsigned int flags; struct task_struct *gc_task; /* GC task struct */ - struct semaphore gc_thread_start; /* GC thread start mutex */ + struct completion gc_thread_start; /* GC thread start completion */ struct completion gc_thread_exit; /* GC thread exit completion port */ struct semaphore alloc_sem; /* Used to protect all the following -- cgit v1.2.3-70-g09d2 From 3a3ab48c68de656736f091c6ed768fa8c110a7ab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 24 May 2005 20:50:18 +0200 Subject: [MTD] Make map_word_ff ware of the flash buswidth map_word_ff() was setting the mapword to ~0UL regardless of the buswidth of the mapped flash chip. The read_map functions are buswidth aware and therefor the map_word_equal function failed. Signed-off-by: Thomas Gleixner --- include/linux/mtd/map.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 115b14a634d..dd36d9433f0 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -1,6 +1,6 @@ /* Overhauled routines for dealing with different mmap regions of flash */ -/* $Id: map.h,v 1.48 2005/02/16 15:54:59 nico Exp $ */ +/* $Id: map.h,v 1.49 2005/05/24 18:45:15 gleixner Exp $ */ #ifndef __LINUX_MTD_MAP_H__ #define __LINUX_MTD_MAP_H__ @@ -340,13 +340,22 @@ static inline map_word map_word_load_partial(struct map_info *map, map_word orig return orig; } +#if BITS_PER_LONG < 64 +#define MAP_FF_LIMIT 4 +#else +#define MAP_FF_LIMIT 8 +#endif + static inline map_word map_word_ff(struct map_info *map) { map_word r; int i; - - for (i=0; i Date: Wed, 25 May 2005 12:20:29 +0200 Subject: [MTD] map.h Use the correct macro and fix the resulting compiler warning Signed-off-by: Thomas Gleixner --- include/linux/mtd/map.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index dd36d9433f0..dbd7b9b510d 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -1,6 +1,6 @@ /* Overhauled routines for dealing with different mmap regions of flash */ -/* $Id: map.h,v 1.49 2005/05/24 18:45:15 gleixner Exp $ */ +/* $Id: map.h,v 1.51 2005/05/25 10:15:29 gleixner Exp $ */ #ifndef __LINUX_MTD_MAP_H__ #define __LINUX_MTD_MAP_H__ @@ -351,8 +351,9 @@ static inline map_word map_word_ff(struct map_info *map) map_word r; int i; - if (map_bank_width(map) < MAP_FF_LIMIT) { - r.x[0] = (1 << (8*map_bank_width(map))) - 1; + if (map_bankwidth(map) < MAP_FF_LIMIT) { + int bw = 8 * map_bankwidth; + r.x[0] = (1 << bw) - 1; } else { for (i=0; i Date: Wed, 25 May 2005 12:32:37 +0200 Subject: [MTD] Fix it really tglx declares him self to be the idiot of the day. Signed-off-by: Thomas Gleixner --- include/linux/mtd/map.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index dbd7b9b510d..142963f01d2 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -1,6 +1,6 @@ /* Overhauled routines for dealing with different mmap regions of flash */ -/* $Id: map.h,v 1.51 2005/05/25 10:15:29 gleixner Exp $ */ +/* $Id: map.h,v 1.52 2005/05/25 10:29:41 gleixner Exp $ */ #ifndef __LINUX_MTD_MAP_H__ #define __LINUX_MTD_MAP_H__ @@ -352,7 +352,7 @@ static inline map_word map_word_ff(struct map_info *map) int i; if (map_bankwidth(map) < MAP_FF_LIMIT) { - int bw = 8 * map_bankwidth; + int bw = 8 * map_bankwidth(map); r.x[0] = (1 << bw) - 1; } else { for (i=0; i Date: Tue, 24 May 2005 01:46:34 -0700 Subject: [MTD] NAND: Add Hynix to manufacturer list Signed-off-by: Nicolas S. Dade Signed-off-by: Thomas Gleixner --- drivers/mtd/nand/nand_ids.c | 1 + include/linux/mtd/nand.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index 79945e6ce2b..4b2bfae6f50 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -116,6 +116,7 @@ struct nand_manufacturers nand_manuf_ids[] = { {NAND_MFR_NATIONAL, "National"}, {NAND_MFR_RENESAS, "Renesas"}, {NAND_MFR_STMICRO, "ST Micro"}, + {NAND_MFR_HYNIX, "Hynix"}, {0x0, "Unknown"} }; diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index cf25c7cfd0b..bee78969cb2 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -384,6 +384,7 @@ struct nand_chip { #define NAND_MFR_NATIONAL 0x8f #define NAND_MFR_RENESAS 0x07 #define NAND_MFR_STMICRO 0x20 +#define NAND_MFR_HYNIX 0xad /** * struct nand_flash_dev - NAND Flash Device ID Structure -- cgit v1.2.3-70-g09d2 From 0dfc62465ef92c7ddcb1ba223bf062453566fd0f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 May 2005 20:39:20 +0100 Subject: [MTD] NAND: Reorganize chip locking The code was wrong in several aspects. The locking order was inconsistent, the device aquire code did not reset a variable after a wakeup and the wakeup handling was not working for applications where multiple chips are sharing a single hardware controller. When a hardware controller is available the locking is now reduced to the hardware controller lock and the waitqueue is moved to the hardware controller structure in order to avoid a wake_up_all(). The problem was pointed out by Ben Dooks, who also found the missing variable reset as main cause for his deadlock problem. Signed-off-by: Thomas Gleixner --- drivers/mtd/nand/nand_base.c | 57 ++++++++++++++++++++++---------------------- include/linux/mtd/nand.h | 5 +++- 2 files changed, 33 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index f1db0bf9306..bbe0283433d 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -59,7 +59,7 @@ * The AG-AND chips have nice features for speed improvement, * which are not supported yet. Read / program 4 pages in one go. * - * $Id: nand_base.c,v 1.143 2005/05/19 16:10:22 gleixner Exp $ + * $Id: nand_base.c,v 1.145 2005/05/31 20:32:53 gleixner Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -167,17 +167,21 @@ static void nand_release_device (struct mtd_info *mtd) /* De-select the NAND device */ this->select_chip(mtd, -1); - /* Do we have a hardware controller ? */ + if (this->controller) { + /* Release the controller and the chip */ spin_lock(&this->controller->lock); this->controller->active = NULL; + this->state = FL_READY; + wake_up(&this->controller->wq); spin_unlock(&this->controller->lock); + } else { + /* Release the chip */ + spin_lock(&this->chip_lock); + this->state = FL_READY; + wake_up(&this->wq); + spin_unlock(&this->chip_lock); } - /* Release the chip */ - spin_lock (&this->chip_lock); - this->state = FL_READY; - wake_up (&this->wq); - spin_unlock (&this->chip_lock); } /** @@ -753,37 +757,34 @@ static void nand_command_lp (struct mtd_info *mtd, unsigned command, int column, */ static void nand_get_device (struct nand_chip *this, struct mtd_info *mtd, int new_state) { - struct nand_chip *active = this; - + struct nand_chip *active; + spinlock_t *lock; + wait_queue_head_t *wq; DECLARE_WAITQUEUE (wait, current); - /* - * Grab the lock and see if the device is available - */ + lock = (this->controller) ? &this->controller->lock : &this->chip_lock; + wq = (this->controller) ? &this->controller->wq : &this->wq; retry: + active = this; + spin_lock(lock); + /* Hardware controller shared among independend devices */ if (this->controller) { - spin_lock (&this->controller->lock); if (this->controller->active) active = this->controller->active; else this->controller->active = this; - spin_unlock (&this->controller->lock); } - - if (active == this) { - spin_lock (&this->chip_lock); - if (this->state == FL_READY) { - this->state = new_state; - spin_unlock (&this->chip_lock); - return; - } - } - set_current_state (TASK_UNINTERRUPTIBLE); - add_wait_queue (&active->wq, &wait); - spin_unlock (&active->chip_lock); - schedule (); - remove_wait_queue (&active->wq, &wait); + if (active == this && this->state == FL_READY) { + this->state = new_state; + spin_unlock(lock); + return; + } + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(wq, &wait); + spin_unlock(lock); + schedule(); + remove_wait_queue(wq, &wait); goto retry; } diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index bee78969cb2..9b5b7621758 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -5,7 +5,7 @@ * Steven J. Hill * Thomas Gleixner * - * $Id: nand.h,v 1.71 2005/02/09 12:12:59 gleixner Exp $ + * $Id: nand.h,v 1.73 2005/05/31 19:39:17 gleixner Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -253,10 +253,13 @@ struct nand_chip; * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independend devices * @lock: protection lock * @active: the mtd device which holds the controller currently + * @wq: wait queue to sleep on if a NAND operation is in progress + * used instead of the per chip wait queue when a hw controller is available */ struct nand_hw_control { spinlock_t lock; struct nand_chip *active; + wait_queue_head_t wq; }; /** -- cgit v1.2.3-70-g09d2 From 02b15e343aeefb49f8cac949be599d78250a568f Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Tue, 7 Jun 2005 00:04:39 +0100 Subject: [MTD] XIP for AMD CFI flash. Author: Vitaly Wool Signed-off-by: Todd Poynor Signed-off-by: Thomas Gleixner --- drivers/mtd/chips/Kconfig | 4 +- drivers/mtd/chips/cfi_cmdset_0002.c | 402 ++++++++++++++++++++++++++++-------- drivers/mtd/chips/fwh_lock.h | 6 +- drivers/mtd/maps/map_funcs.c | 11 +- include/linux/mtd/cfi.h | 12 +- 5 files changed, 324 insertions(+), 111 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig index f4eda1e40d5..b5dc59389bb 100644 --- a/drivers/mtd/chips/Kconfig +++ b/drivers/mtd/chips/Kconfig @@ -1,5 +1,5 @@ # drivers/mtd/chips/Kconfig -# $Id: Kconfig,v 1.14 2005/02/08 17:11:15 nico Exp $ +# $Id: Kconfig,v 1.15 2005/06/06 23:04:35 tpoynor Exp $ menu "RAM/ROM/Flash chip drivers" depends on MTD!=n @@ -300,7 +300,7 @@ config MTD_JEDEC config MTD_XIP bool "XIP aware MTD support" - depends on !SMP && MTD_CFI_INTELEXT && EXPERIMENTAL + depends on !SMP && (MTD_CFI_INTELEXT || MTD_CFI_AMDSTD) && EXPERIMENTAL default y if XIP_KERNEL help This allows MTD support to work with flash memory which is also diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 49cd8120713..e42eefbda0e 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -4,16 +4,20 @@ * * Copyright (C) 2000 Crossnet Co. * Copyright (C) 2004 Arcom Control Systems Ltd + * Copyright (C) 2005 MontaVista Software Inc. * * 2_by_8 routines added by Simon Munton * * 4_by_16 work by Carolyn J. Smith * + * XIP support hooks by Vitaly Wool (based on code for Intel flash + * by Nicolas Pitre) + * * Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com * * This code is GPL * - * $Id: cfi_cmdset_0002.c,v 1.116 2005/05/24 13:29:42 gleixner Exp $ + * $Id: cfi_cmdset_0002.c,v 1.117 2005/06/06 23:04:35 tpoynor Exp $ * */ @@ -34,6 +38,7 @@ #include #include #include +#include #define AMD_BOOTLOC_BUG #define FORCE_WORD_WRITE 0 @@ -393,7 +398,7 @@ static struct mtd_info *cfi_amdstd_setup(struct mtd_info *mtd) * correctly and is therefore not done (particulary with interleaved chips * as each chip must be checked independantly of the others). */ -static int chip_ready(struct map_info *map, unsigned long addr) +static int __xipram chip_ready(struct map_info *map, unsigned long addr) { map_word d, t; @@ -418,7 +423,7 @@ static int chip_ready(struct map_info *map, unsigned long addr) * as each chip must be checked independantly of the others). * */ -static int chip_good(struct map_info *map, unsigned long addr, map_word expected) +static int __xipram chip_good(struct map_info *map, unsigned long addr, map_word expected) { map_word oldd, curd; @@ -448,12 +453,12 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr if (time_after(jiffies, timeo)) { printk(KERN_ERR "Waiting for chip to be ready timed out.\n"); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return -EIO; } - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); cfi_udelay(1); - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); /* Someone else might have been playing with it. */ goto retry; } @@ -501,15 +506,23 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr return -EIO; } - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); cfi_udelay(1); - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); /* Nobody will touch it while it's in state FL_ERASE_SUSPENDING. So we can just loop here. */ } chip->state = FL_READY; return 0; + case FL_XIP_WHILE_ERASING: + if (mode != FL_READY && mode != FL_POINT && + (!cfip || !(cfip->EraseSuspend&2))) + goto sleep; + chip->oldstate = chip->state; + chip->state = FL_READY; + return 0; + case FL_POINT: /* Only if there's no operation suspended... */ if (mode == FL_READY && chip->oldstate == FL_READY) @@ -519,10 +532,10 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr sleep: set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); schedule(); remove_wait_queue(&chip->wq, &wait); - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); goto resettime; } } @@ -540,6 +553,11 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad chip->state = FL_ERASING; break; + case FL_XIP_WHILE_ERASING: + chip->state = chip->oldstate; + chip->oldstate = FL_READY; + break; + case FL_READY: case FL_STATUS: /* We should really make set_vpp() count, rather than doing this */ @@ -551,6 +569,198 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad wake_up(&chip->wq); } +#ifdef CONFIG_MTD_XIP + +/* + * No interrupt what so ever can be serviced while the flash isn't in array + * mode. This is ensured by the xip_disable() and xip_enable() functions + * enclosing any code path where the flash is known not to be in array mode. + * And within a XIP disabled code path, only functions marked with __xipram + * may be called and nothing else (it's a good thing to inspect generated + * assembly to make sure inline functions were actually inlined and that gcc + * didn't emit calls to its own support functions). Also configuring MTD CFI + * support to a single buswidth and a single interleave is also recommended. + */ +#include +static void xip_disable(struct map_info *map, struct flchip *chip, + unsigned long adr) +{ + /* TODO: chips with no XIP use should ignore and return */ + (void) map_read(map, adr); /* ensure mmu mapping is up to date */ + local_irq_disable(); +} + +static void __xipram xip_enable(struct map_info *map, struct flchip *chip, + unsigned long adr) +{ + struct cfi_private *cfi = map->fldrv_priv; + + if (chip->state != FL_POINT && chip->state != FL_READY) { + map_write(map, CMD(0xf0), adr); + chip->state = FL_READY; + } + (void) map_read(map, adr); + asm volatile (".rep 8; nop; .endr"); /* fill instruction prefetch */ + local_irq_enable(); +} + +/* + * When a delay is required for the flash operation to complete, the + * xip_udelay() function is polling for both the given timeout and pending + * (but still masked) hardware interrupts. Whenever there is an interrupt + * pending then the flash erase operation is suspended, array mode restored + * and interrupts unmasked. Task scheduling might also happen at that + * point. The CPU eventually returns from the interrupt or the call to + * schedule() and the suspended flash operation is resumed for the remaining + * of the delay period. + * + * Warning: this function _will_ fool interrupt latency tracing tools. + */ + +static void __xipram xip_udelay(struct map_info *map, struct flchip *chip, + unsigned long adr, int usec) +{ + struct cfi_private *cfi = map->fldrv_priv; + struct cfi_pri_amdstd *extp = cfi->cmdset_priv; + map_word status, OK = CMD(0x80); + unsigned long suspended, start = xip_currtime(); + flstate_t oldstate; + + do { + cpu_relax(); + if (xip_irqpending() && extp && + ((chip->state == FL_ERASING && (extp->EraseSuspend & 2))) && + (cfi_interleave_is_1(cfi) || chip->oldstate == FL_READY)) { + /* + * Let's suspend the erase operation when supported. + * Note that we currently don't try to suspend + * interleaved chips if there is already another + * operation suspended (imagine what happens + * when one chip was already done with the current + * operation while another chip suspended it, then + * we resume the whole thing at once). Yes, it + * can happen! + */ + map_write(map, CMD(0xb0), adr); + usec -= xip_elapsed_since(start); + suspended = xip_currtime(); + do { + if (xip_elapsed_since(suspended) > 100000) { + /* + * The chip doesn't want to suspend + * after waiting for 100 msecs. + * This is a critical error but there + * is not much we can do here. + */ + return; + } + status = map_read(map, adr); + } while (!map_word_andequal(map, status, OK, OK)); + + /* Suspend succeeded */ + oldstate = chip->state; + if (!map_word_bitsset(map, status, CMD(0x40))) + break; + chip->state = FL_XIP_WHILE_ERASING; + chip->erase_suspended = 1; + map_write(map, CMD(0xf0), adr); + (void) map_read(map, adr); + asm volatile (".rep 8; nop; .endr"); + local_irq_enable(); + spin_unlock(chip->mutex); + asm volatile (".rep 8; nop; .endr"); + cond_resched(); + + /* + * We're back. However someone else might have + * decided to go write to the chip if we are in + * a suspended erase state. If so let's wait + * until it's done. + */ + spin_lock(chip->mutex); + while (chip->state != FL_XIP_WHILE_ERASING) { + DECLARE_WAITQUEUE(wait, current); + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&chip->wq, &wait); + spin_unlock(chip->mutex); + schedule(); + remove_wait_queue(&chip->wq, &wait); + spin_lock(chip->mutex); + } + /* Disallow XIP again */ + local_irq_disable(); + + /* Resume the write or erase operation */ + map_write(map, CMD(0x30), adr); + chip->state = oldstate; + start = xip_currtime(); + } else if (usec >= 1000000/HZ) { + /* + * Try to save on CPU power when waiting delay + * is at least a system timer tick period. + * No need to be extremely accurate here. + */ + xip_cpu_idle(); + } + status = map_read(map, adr); + } while (!map_word_andequal(map, status, OK, OK) + && xip_elapsed_since(start) < usec); +} + +#define UDELAY(map, chip, adr, usec) xip_udelay(map, chip, adr, usec) + +/* + * The INVALIDATE_CACHED_RANGE() macro is normally used in parallel while + * the flash is actively programming or erasing since we have to poll for + * the operation to complete anyway. We can't do that in a generic way with + * a XIP setup so do it before the actual flash operation in this case + * and stub it out from INVALIDATE_CACHE_UDELAY. + */ +#define XIP_INVAL_CACHED_RANGE(map, from, size) \ + INVALIDATE_CACHED_RANGE(map, from, size) + +#define INVALIDATE_CACHE_UDELAY(map, chip, adr, len, usec) \ + UDELAY(map, chip, adr, usec) + +/* + * Extra notes: + * + * Activating this XIP support changes the way the code works a bit. For + * example the code to suspend the current process when concurrent access + * happens is never executed because xip_udelay() will always return with the + * same chip state as it was entered with. This is why there is no care for + * the presence of add_wait_queue() or schedule() calls from within a couple + * xip_disable()'d areas of code, like in do_erase_oneblock for example. + * The queueing and scheduling are always happening within xip_udelay(). + * + * Similarly, get_chip() and put_chip() just happen to always be executed + * with chip->state set to FL_READY (or FL_XIP_WHILE_*) where flash state + * is in array mode, therefore never executing many cases therein and not + * causing any problem with XIP. + */ + +#else + +#define xip_disable(map, chip, adr) +#define xip_enable(map, chip, adr) +#define XIP_INVAL_CACHED_RANGE(x...) + +#define UDELAY(map, chip, adr, usec) \ +do { \ + spin_unlock(chip->mutex); \ + cfi_udelay(usec); \ + spin_lock(chip->mutex); \ +} while (0) + +#define INVALIDATE_CACHE_UDELAY(map, chip, adr, len, usec) \ +do { \ + spin_unlock(chip->mutex); \ + INVALIDATE_CACHED_RANGE(map, adr, len); \ + cfi_udelay(usec); \ + spin_lock(chip->mutex); \ +} while (0) + +#endif static inline int do_read_onechip(struct map_info *map, struct flchip *chip, loff_t adr, size_t len, u_char *buf) { @@ -563,10 +773,10 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof /* Ensure cmd read/writes are aligned. */ cmd_addr = adr & ~(map_bankwidth(map)-1); - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); ret = get_chip(map, chip, cmd_addr, FL_READY); if (ret) { - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } @@ -579,7 +789,7 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof put_chip(map, chip, cmd_addr); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return 0; } @@ -633,7 +843,7 @@ static inline int do_read_secsi_onechip(struct map_info *map, struct flchip *chi struct cfi_private *cfi = map->fldrv_priv; retry: - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); if (chip->state != FL_READY){ #if 0 @@ -642,7 +852,7 @@ static inline int do_read_secsi_onechip(struct map_info *map, struct flchip *chi set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); schedule(); remove_wait_queue(&chip->wq, &wait); @@ -671,7 +881,7 @@ static inline int do_read_secsi_onechip(struct map_info *map, struct flchip *chi cfi_send_gen_cmd(0x00, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); wake_up(&chip->wq); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return 0; } @@ -720,7 +930,7 @@ static int cfi_amdstd_secsi_read (struct mtd_info *mtd, loff_t from, size_t len, } -static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned long adr, map_word datum) +static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip, unsigned long adr, map_word datum) { struct cfi_private *cfi = map->fldrv_priv; unsigned long timeo = jiffies + HZ; @@ -740,10 +950,10 @@ static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned adr += chip->start; - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); ret = get_chip(map, chip, adr, FL_WRITING); if (ret) { - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } @@ -763,7 +973,9 @@ static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned goto op_done; } + XIP_INVAL_CACHED_RANGE(map, adr, map_bankwidth(map)); ENABLE_VPP(map); + xip_disable(map, chip, adr); retry: cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL); @@ -771,9 +983,9 @@ static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned map_write(map, datum, adr); chip->state = FL_WRITING; - cfi_spin_unlock(chip->mutex); - cfi_udelay(chip->word_write_time); - cfi_spin_lock(chip->mutex); + INVALIDATE_CACHE_UDELAY(map, chip, + adr, map_bankwidth(map), + chip->word_write_time); /* See comment above for timeout value. */ timeo = jiffies + uWriteTimeout; @@ -784,11 +996,11 @@ static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); schedule(); remove_wait_queue(&chip->wq, &wait); timeo = jiffies + (HZ / 2); /* FIXME */ - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); continue; } @@ -796,14 +1008,14 @@ static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned break; if (time_after(jiffies, timeo)) { + xip_enable(map, chip, adr); printk(KERN_WARNING "MTD %s(): software timeout\n", __func__); + xip_disable(map, chip, adr); break; } /* Latency issues. Drop the lock, wait a while and retry */ - cfi_spin_unlock(chip->mutex); - cfi_udelay(1); - cfi_spin_lock(chip->mutex); + UDELAY(map, chip, adr, 1); } /* Did we succeed? */ if (!chip_good(map, adr, datum)) { @@ -816,10 +1028,11 @@ static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned ret = -EIO; } + xip_enable(map, chip, adr); op_done: chip->state = FL_READY; put_chip(map, chip, adr); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } @@ -851,7 +1064,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len, map_word tmp_buf; retry: - cfi_spin_lock(cfi->chips[chipnum].mutex); + spin_lock(cfi->chips[chipnum].mutex); if (cfi->chips[chipnum].state != FL_READY) { #if 0 @@ -860,7 +1073,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len, set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&cfi->chips[chipnum].wq, &wait); - cfi_spin_unlock(cfi->chips[chipnum].mutex); + spin_unlock(cfi->chips[chipnum].mutex); schedule(); remove_wait_queue(&cfi->chips[chipnum].wq, &wait); @@ -874,7 +1087,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len, /* Load 'tmp_buf' with old contents of flash */ tmp_buf = map_read(map, bus_ofs+chipstart); - cfi_spin_unlock(cfi->chips[chipnum].mutex); + spin_unlock(cfi->chips[chipnum].mutex); /* Number of bytes to copy from buffer */ n = min_t(int, len, map_bankwidth(map)-i); @@ -929,7 +1142,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len, map_word tmp_buf; retry1: - cfi_spin_lock(cfi->chips[chipnum].mutex); + spin_lock(cfi->chips[chipnum].mutex); if (cfi->chips[chipnum].state != FL_READY) { #if 0 @@ -938,7 +1151,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len, set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&cfi->chips[chipnum].wq, &wait); - cfi_spin_unlock(cfi->chips[chipnum].mutex); + spin_unlock(cfi->chips[chipnum].mutex); schedule(); remove_wait_queue(&cfi->chips[chipnum].wq, &wait); @@ -951,7 +1164,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len, tmp_buf = map_read(map, ofs + chipstart); - cfi_spin_unlock(cfi->chips[chipnum].mutex); + spin_unlock(cfi->chips[chipnum].mutex); tmp_buf = map_word_load_partial(map, tmp_buf, buf, 0, len); @@ -970,8 +1183,9 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len, /* * FIXME: interleaved mode not tested, and probably not supported! */ -static inline int do_write_buffer(struct map_info *map, struct flchip *chip, - unsigned long adr, const u_char *buf, int len) +static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, + unsigned long adr, const u_char *buf, + int len) { struct cfi_private *cfi = map->fldrv_priv; unsigned long timeo = jiffies + HZ; @@ -985,10 +1199,10 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip, adr += chip->start; cmd_adr = adr; - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); ret = get_chip(map, chip, adr, FL_WRITING); if (ret) { - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } @@ -997,7 +1211,10 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip, DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n", __func__, adr, datum.x[0] ); + XIP_INVAL_CACHED_RANGE(map, adr, len); ENABLE_VPP(map); + xip_disable(map, chip, cmd_adr); + cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL); //cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); @@ -1027,9 +1244,9 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip, map_write(map, CMD(0x29), cmd_adr); chip->state = FL_WRITING; - cfi_spin_unlock(chip->mutex); - cfi_udelay(chip->buffer_write_time); - cfi_spin_lock(chip->mutex); + INVALIDATE_CACHE_UDELAY(map, chip, + adr, map_bankwidth(map), + chip->word_write_time); timeo = jiffies + uWriteTimeout; @@ -1040,38 +1257,39 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip, set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); schedule(); remove_wait_queue(&chip->wq, &wait); timeo = jiffies + (HZ / 2); /* FIXME */ - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); continue; } - if (chip_ready(map, adr)) + if (chip_ready(map, adr)) { + xip_enable(map, chip, adr); goto op_done; + } if( time_after(jiffies, timeo)) break; /* Latency issues. Drop the lock, wait a while and retry */ - cfi_spin_unlock(chip->mutex); - cfi_udelay(1); - cfi_spin_lock(chip->mutex); + UDELAY(map, chip, adr, 1); } - printk(KERN_WARNING "MTD %s(): software timeout\n", - __func__ ); - /* reset on all failures. */ map_write( map, CMD(0xF0), chip->start ); + xip_enable(map, chip, adr); /* FIXME - should have reset delay before continuing */ + printk(KERN_WARNING "MTD %s(): software timeout\n", + __func__ ); + ret = -EIO; op_done: chip->state = FL_READY; put_chip(map, chip, adr); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } @@ -1161,7 +1379,7 @@ static int cfi_amdstd_write_buffers(struct mtd_info *mtd, loff_t to, size_t len, * Handle devices with one erase region, that only implement * the chip erase command. */ -static inline int do_erase_chip(struct map_info *map, struct flchip *chip) +static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) { struct cfi_private *cfi = map->fldrv_priv; unsigned long timeo = jiffies + HZ; @@ -1171,17 +1389,20 @@ static inline int do_erase_chip(struct map_info *map, struct flchip *chip) adr = cfi->addr_unlock1; - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); ret = get_chip(map, chip, adr, FL_WRITING); if (ret) { - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): ERASE 0x%.8lx\n", __func__, chip->start ); + XIP_INVAL_CACHED_RANGE(map, adr, map->size); ENABLE_VPP(map); + xip_disable(map, chip, adr); + cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); @@ -1193,9 +1414,9 @@ static inline int do_erase_chip(struct map_info *map, struct flchip *chip) chip->erase_suspended = 0; chip->in_progress_block_addr = adr; - cfi_spin_unlock(chip->mutex); - msleep(chip->erase_time/2); - cfi_spin_lock(chip->mutex); + INVALIDATE_CACHE_UDELAY(map, chip, + adr, map->size, + chip->erase_time*500); timeo = jiffies + (HZ*20); @@ -1204,10 +1425,10 @@ static inline int do_erase_chip(struct map_info *map, struct flchip *chip) /* Someone's suspended the erase. Sleep */ set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); schedule(); remove_wait_queue(&chip->wq, &wait); - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); continue; } if (chip->erase_suspended) { @@ -1227,10 +1448,7 @@ static inline int do_erase_chip(struct map_info *map, struct flchip *chip) } /* Latency issues. Drop the lock, wait a while and retry */ - cfi_spin_unlock(chip->mutex); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - cfi_spin_lock(chip->mutex); + UDELAY(map, chip, adr, 1000000/HZ); } /* Did we succeed? */ if (!chip_good(map, adr, map_word_ff(map))) { @@ -1242,14 +1460,15 @@ static inline int do_erase_chip(struct map_info *map, struct flchip *chip) } chip->state = FL_READY; + xip_enable(map, chip, adr); put_chip(map, chip, adr); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } -static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr, int len, void *thunk) +static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr, int len, void *thunk) { struct cfi_private *cfi = map->fldrv_priv; unsigned long timeo = jiffies + HZ; @@ -1258,17 +1477,20 @@ static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, u adr += chip->start; - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); ret = get_chip(map, chip, adr, FL_ERASING); if (ret) { - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): ERASE 0x%.8lx\n", __func__, adr ); + XIP_INVAL_CACHED_RANGE(map, adr, len); ENABLE_VPP(map); + xip_disable(map, chip, adr); + cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); @@ -1279,10 +1501,10 @@ static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, u chip->state = FL_ERASING; chip->erase_suspended = 0; chip->in_progress_block_addr = adr; - - cfi_spin_unlock(chip->mutex); - msleep(chip->erase_time/2); - cfi_spin_lock(chip->mutex); + + INVALIDATE_CACHE_UDELAY(map, chip, + adr, len, + chip->erase_time*500); timeo = jiffies + (HZ*20); @@ -1291,10 +1513,10 @@ static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, u /* Someone's suspended the erase. Sleep */ set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); schedule(); remove_wait_queue(&chip->wq, &wait); - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); continue; } if (chip->erase_suspended) { @@ -1304,20 +1526,20 @@ static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, u chip->erase_suspended = 0; } - if (chip_ready(map, adr)) + if (chip_ready(map, adr)) { + xip_enable(map, chip, adr); break; + } if (time_after(jiffies, timeo)) { + xip_enable(map, chip, adr); printk(KERN_WARNING "MTD %s(): software timeout\n", __func__ ); break; } /* Latency issues. Drop the lock, wait a while and retry */ - cfi_spin_unlock(chip->mutex); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - cfi_spin_lock(chip->mutex); + UDELAY(map, chip, adr, 1000000/HZ); } /* Did we succeed? */ if (!chip_good(map, adr, map_word_ff(map))) { @@ -1330,7 +1552,7 @@ static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, u chip->state = FL_READY; put_chip(map, chip, adr); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } @@ -1390,7 +1612,7 @@ static void cfi_amdstd_sync (struct mtd_info *mtd) chip = &cfi->chips[i]; retry: - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); switch(chip->state) { case FL_READY: @@ -1404,14 +1626,14 @@ static void cfi_amdstd_sync (struct mtd_info *mtd) * with the chip now anyway. */ case FL_SYNCING: - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); break; default: /* Not an idle state */ add_wait_queue(&chip->wq, &wait); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); schedule(); @@ -1426,13 +1648,13 @@ static void cfi_amdstd_sync (struct mtd_info *mtd) for (i--; i >=0; i--) { chip = &cfi->chips[i]; - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); if (chip->state == FL_SYNCING) { chip->state = chip->oldstate; wake_up(&chip->wq); } - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); } } @@ -1448,7 +1670,7 @@ static int cfi_amdstd_suspend(struct mtd_info *mtd) for (i=0; !ret && inumchips; i++) { chip = &cfi->chips[i]; - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); switch(chip->state) { case FL_READY: @@ -1468,7 +1690,7 @@ static int cfi_amdstd_suspend(struct mtd_info *mtd) ret = -EAGAIN; break; } - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); } /* Unlock the chips again */ @@ -1477,13 +1699,13 @@ static int cfi_amdstd_suspend(struct mtd_info *mtd) for (i--; i >=0; i--) { chip = &cfi->chips[i]; - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); if (chip->state == FL_PM_SUSPENDED) { chip->state = chip->oldstate; wake_up(&chip->wq); } - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); } } @@ -1502,7 +1724,7 @@ static void cfi_amdstd_resume(struct mtd_info *mtd) chip = &cfi->chips[i]; - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); if (chip->state == FL_PM_SUSPENDED) { chip->state = FL_READY; @@ -1512,7 +1734,7 @@ static void cfi_amdstd_resume(struct mtd_info *mtd) else printk(KERN_ERR "Argh. Chip not in PM_SUSPENDED state upon resume()\n"); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); } } diff --git a/drivers/mtd/chips/fwh_lock.h b/drivers/mtd/chips/fwh_lock.h index fbf44708a86..e1a5b76596c 100644 --- a/drivers/mtd/chips/fwh_lock.h +++ b/drivers/mtd/chips/fwh_lock.h @@ -58,10 +58,10 @@ static int fwh_xxlock_oneblock(struct map_info *map, struct flchip *chip, * to flash memory - that means that we don't have to check status * and timeout. */ - cfi_spin_lock(chip->mutex); + spin_lock(chip->mutex); ret = get_chip(map, chip, adr, FL_LOCKING); if (ret) { - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return ret; } @@ -71,7 +71,7 @@ static int fwh_xxlock_oneblock(struct map_info *map, struct flchip *chip, /* Done and happy. */ chip->state = FL_READY; put_chip(map, chip, adr); - cfi_spin_unlock(chip->mutex); + spin_unlock(chip->mutex); return 0; } diff --git a/drivers/mtd/maps/map_funcs.c b/drivers/mtd/maps/map_funcs.c index 38f6a7af53f..9105e6ca0aa 100644 --- a/drivers/mtd/maps/map_funcs.c +++ b/drivers/mtd/maps/map_funcs.c @@ -1,5 +1,5 @@ /* - * $Id: map_funcs.c,v 1.9 2004/07/13 22:33:15 dwmw2 Exp $ + * $Id: map_funcs.c,v 1.10 2005/06/06 23:04:36 tpoynor Exp $ * * Out-of-line map I/O functions for simple maps when CONFIG_COMPLEX_MAPPINGS * is enabled. @@ -9,23 +9,24 @@ #include #include +#include -static map_word simple_map_read(struct map_info *map, unsigned long ofs) +static map_word __xipram simple_map_read(struct map_info *map, unsigned long ofs) { return inline_map_read(map, ofs); } -static void simple_map_write(struct map_info *map, const map_word datum, unsigned long ofs) +static void __xipram simple_map_write(struct map_info *map, const map_word datum, unsigned long ofs) { inline_map_write(map, datum, ofs); } -static void simple_map_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) +static void __xipram simple_map_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) { inline_map_copy_from(map, to, from, len); } -static void simple_map_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len) +static void __xipram simple_map_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len) { inline_map_copy_to(map, to, from, len); } diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 66e0a32efba..e6b6a1c66bd 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -1,7 +1,7 @@ /* Common Flash Interface structures * See http://support.intel.com/design/flash/technote/index.htm - * $Id: cfi.h,v 1.53 2005/03/15 19:03:13 gleixner Exp $ + * $Id: cfi.h,v 1.54 2005/06/06 23:04:36 tpoynor Exp $ */ #ifndef __MTD_CFI_H__ @@ -428,16 +428,6 @@ static inline void cfi_udelay(int us) } } -static inline void cfi_spin_lock(spinlock_t *mutex) -{ - spin_lock_bh(mutex); -} - -static inline void cfi_spin_unlock(spinlock_t *mutex) -{ - spin_unlock_bh(mutex); -} - struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size, const char* name); struct cfi_fixup { -- cgit v1.2.3-70-g09d2 From bfabb98688e7089381baa0974f7ff6786ce2a2d0 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Mon, 13 Jun 2005 14:08:48 +0100 Subject: [MTD] Use correct major number for INFTL inftl was assigned new major number 96, 94 is in use by dasd. See: http://www.ussg.iu.edu/hypermail/linux/kernel/0409.2/1220.html Signed-off-by: Sean Young Signed-off-by: Thomas Gleixner --- include/linux/mtd/inftl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/inftl.h b/include/linux/mtd/inftl.h index b52c8cbd235..0268125a627 100644 --- a/include/linux/mtd/inftl.h +++ b/include/linux/mtd/inftl.h @@ -3,7 +3,7 @@ * * (C) Copyright 2002, Greg Ungerer (gerg@snapgear.com) * - * $Id: inftl.h,v 1.6 2004/06/30 14:49:00 dbrown Exp $ + * $Id: inftl.h,v 1.7 2005/06/13 13:08:45 sean Exp $ */ #ifndef __MTD_INFTL_H__ @@ -20,7 +20,7 @@ #include #ifndef INFTL_MAJOR -#define INFTL_MAJOR 94 +#define INFTL_MAJOR 96 #endif #define INFTL_PARTN_BITS 4 -- cgit v1.2.3-70-g09d2 From 0edb586049e57c56e625536476931117a57671e9 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 22 Jun 2005 16:59:51 +0200 Subject: [PATCH] driver core: add bus_find_device & driver_find_device functions Add bus_find_device() and driver_find_device() which allow searching for a device in the bus's resp. the driver's klist and obtain a reference on it. Signed-off-by: Cornelia Huck Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 34 ++++++++++++++++++++++++++++++++++ drivers/base/driver.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/device.h | 5 +++++ 3 files changed, 74 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index c3fac7fd555..2c64b792d07 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -177,6 +177,39 @@ int bus_for_each_dev(struct bus_type * bus, struct device * start, return error; } +/** + * bus_find_device - device iterator for locating a particular device. + * @bus: bus type + * @start: Device to begin with + * @data: Data to pass to match function + * @match: Callback function to check device + * + * This is similar to the bus_for_each_dev() function above, but it + * returns a reference to a device that is 'found' for later use, as + * determined by the @match callback. + * + * The callback should return 0 if the device doesn't match and non-zero + * if it does. If the callback returns non-zero, this function will + * return to the caller and not iterate over any more devices. + */ +struct device * bus_find_device(struct bus_type *bus, + struct device *start, void *data, + int (*match)(struct device *, void *)) +{ + struct klist_iter i; + struct device *dev; + + if (!bus) + return NULL; + + klist_iter_init_node(&bus->klist_devices, &i, + (start ? &start->knode_bus : NULL)); + while ((dev = next_device(&i))) + if (match(dev, data) && get_device(dev)) + break; + klist_iter_exit(&i); + return dev; +} static struct device_driver * next_driver(struct klist_iter * i) @@ -557,6 +590,7 @@ int __init buses_init(void) EXPORT_SYMBOL_GPL(bus_for_each_dev); +EXPORT_SYMBOL_GPL(bus_find_device); EXPORT_SYMBOL_GPL(bus_for_each_drv); EXPORT_SYMBOL_GPL(bus_add_device); diff --git a/drivers/base/driver.c b/drivers/base/driver.c index 1b645886e9e..291c5954a3a 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -55,6 +55,41 @@ int driver_for_each_device(struct device_driver * drv, struct device * start, EXPORT_SYMBOL_GPL(driver_for_each_device); +/** + * driver_find_device - device iterator for locating a particular device. + * @driver: The device's driver + * @start: Device to begin with + * @data: Data to pass to match function + * @match: Callback function to check device + * + * This is similar to the driver_for_each_device() function above, but + * it returns a reference to a device that is 'found' for later use, as + * determined by the @match callback. + * + * The callback should return 0 if the device doesn't match and non-zero + * if it does. If the callback returns non-zero, this function will + * return to the caller and not iterate over any more devices. + */ +struct device * driver_find_device(struct device_driver *drv, + struct device * start, void * data, + int (*match)(struct device *, void *)) +{ + struct klist_iter i; + struct device *dev; + + if (!drv) + return NULL; + + klist_iter_init_node(&drv->klist_devices, &i, + (start ? &start->knode_driver : NULL)); + while ((dev = next_device(&i))) + if (match(dev, data) && get_device(dev)) + break; + klist_iter_exit(&i); + return dev; +} +EXPORT_SYMBOL_GPL(driver_find_device); + /** * driver_create_file - create sysfs file for driver. * @drv: driver. diff --git a/include/linux/device.h b/include/linux/device.h index 7b781a72b29..07222c531d3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -80,6 +80,8 @@ extern struct bus_type * find_bus(char * name); int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data, int (*fn)(struct device *, void *)); +struct device * bus_find_device(struct bus_type *bus, struct device *start, + void *data, int (*match)(struct device *, void *)); int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, void * data, int (*fn)(struct device_driver *, void *)); @@ -142,6 +144,9 @@ extern void driver_remove_file(struct device_driver *, struct driver_attribute * extern int driver_for_each_device(struct device_driver * drv, struct device * start, void * data, int (*fn)(struct device *, void *)); +struct device * driver_find_device(struct device_driver *drv, + struct device *start, void *data, + int (*match)(struct device *, void *)); /* -- cgit v1.2.3-70-g09d2 From 23d3d602cb96addd3c1158424fb01a49ea5e81b1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Jun 2005 16:09:05 -0700 Subject: [PATCH] driver core: change bus_rescan_devices to return void No one was looking at the return value of bus_rescan_devices, and it really wasn't anything that anyone in the kernel would ever care about. So change it which enabled some counting code to be removed also. Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 27 +++++++++------------------ include/linux/device.h | 2 +- 2 files changed, 10 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 7e17488271a..96fe2f95675 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -483,31 +483,22 @@ void bus_remove_driver(struct device_driver * drv) /* Helper for bus_rescan_devices's iter */ static int bus_rescan_devices_helper(struct device *dev, void *data) { - int *count = data; - - if (!dev->driver && (device_attach(dev) > 0)) - (*count)++; - + if (!dev->driver) + device_attach(dev); return 0; } - /** - * bus_rescan_devices - rescan devices on the bus for possible drivers - * @bus: the bus to scan. + * bus_rescan_devices - rescan devices on the bus for possible drivers + * @bus: the bus to scan. * - * This function will look for devices on the bus with no driver - * attached and rescan it against existing drivers to see if it - * matches any. Calls device_attach(). Returns the number of devices - * that were sucessfully bound to a driver. + * This function will look for devices on the bus with no driver + * attached and rescan it against existing drivers to see if it matches + * any by calling device_attach() for the unbound devices. */ -int bus_rescan_devices(struct bus_type * bus) +void bus_rescan_devices(struct bus_type * bus) { - int count = 0; - - bus_for_each_dev(bus, NULL, &count, bus_rescan_devices_helper); - - return count; + bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper); } diff --git a/include/linux/device.h b/include/linux/device.h index 07222c531d3..f378c846e6d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -69,7 +69,7 @@ struct bus_type { extern int bus_register(struct bus_type * bus); extern void bus_unregister(struct bus_type * bus); -extern int bus_rescan_devices(struct bus_type * bus); +extern void bus_rescan_devices(struct bus_type * bus); extern struct bus_type * get_bus(struct bus_type * bus); extern void put_bus(struct bus_type * bus); -- cgit v1.2.3-70-g09d2 From a03fa955576af50df80bec9127b46ef57e0877c0 Mon Sep 17 00:00:00 2001 From: "rajesh.shah@intel.com" Date: Thu, 2 Jun 2005 15:41:48 -0700 Subject: [PATCH] PCI: Increase the number of PCI bus resources This patch increases the number of resource pointers in the pci_bus structure. This is needed to store >4 resource ranges for host bridges and transparent PCI bridges. With this change, all PCI buses will have more resource pointers, but most PCI buses will only use the first 3 or 4, the remaining being NULL. The PCI core already deals with this correctly. Signed-off-by: Rajesh Shah Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 66798b46f30..a46cabfd08c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -586,7 +586,7 @@ struct pci_dev { #define PCI_NUM_RESOURCES 11 #ifndef PCI_BUS_NUM_RESOURCES -#define PCI_BUS_NUM_RESOURCES 4 +#define PCI_BUS_NUM_RESOURCES 8 #endif #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ -- cgit v1.2.3-70-g09d2 From 75865858971add95809c5c9cd35dc4cfba08e33b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 30 Jun 2005 02:18:12 -0700 Subject: [PATCH] PCI: clean up dynamic pci id logic The dynamic pci id logic has been bothering me for a while, and now that I started to look into how to move some of this to the driver core, I thought it was time to clean it all up. It ends up making the code smaller, and easier to follow, and fixes a few bugs at the same time (dynamic ids were not being matched everywhere, and so could be missed on some call paths for new devices, semaphore not needed to be grabbed when adding a new id and calling the driver core, etc.) I also renamed the function pci_match_device() to pci_match_id() as that's what it really does. Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/cpu/cpufreq/gx-suspmod.c | 2 +- drivers/char/hw_random.c | 2 +- drivers/char/watchdog/i8xx_tco.c | 2 +- drivers/ide/setup-pci.c | 2 +- drivers/parport/parport_pc.c | 2 +- drivers/pci/pci-driver.c | 196 +++++++++++------------------- include/linux/pci-dynids.h | 18 --- include/linux/pci.h | 3 +- sound/pci/bt87x.c | 2 +- 9 files changed, 79 insertions(+), 150 deletions(-) delete mode 100644 include/linux/pci-dynids.h (limited to 'include/linux') diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index 1a49adb1f4a..e86ea486c31 100644 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -190,7 +190,7 @@ static __init struct pci_dev *gx_detect_chipset(void) /* detect which companion chip is used */ while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { - if ((pci_match_device (gx_chipset_tbl, gx_pci)) != NULL) { + if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) { return gx_pci; } } diff --git a/drivers/char/hw_random.c b/drivers/char/hw_random.c index 7e6ac14c245..3480535a09c 100644 --- a/drivers/char/hw_random.c +++ b/drivers/char/hw_random.c @@ -579,7 +579,7 @@ static int __init rng_init (void) /* Probe for Intel, AMD RNGs */ for_each_pci_dev(pdev) { - ent = pci_match_device (rng_pci_tbl, pdev); + ent = pci_match_id(rng_pci_tbl, pdev); if (ent) { rng_ops = &rng_vendor_ops[ent->driver_data]; goto match; diff --git a/drivers/char/watchdog/i8xx_tco.c b/drivers/char/watchdog/i8xx_tco.c index b14d642439e..5d07ee59679 100644 --- a/drivers/char/watchdog/i8xx_tco.c +++ b/drivers/char/watchdog/i8xx_tco.c @@ -401,7 +401,7 @@ static unsigned char __init i8xx_tco_getdevice (void) */ while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (pci_match_device(i8xx_tco_pci_tbl, dev)) { + if (pci_match_id(i8xx_tco_pci_tbl, dev)) { i8xx_tco_pci = dev; break; } diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index e501675ad72..77da827b289 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -847,7 +847,7 @@ static int __init ide_scan_pcidev(struct pci_dev *dev) d = list_entry(l, struct pci_driver, node); if(d->id_table) { - const struct pci_device_id *id = pci_match_device(d->id_table, dev); + const struct pci_device_id *id = pci_match_id(d->id_table, dev); if(id != NULL) { if(d->probe(dev, id) >= 0) diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 80edfa3abd2..4598c6a9212 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -3008,7 +3008,7 @@ static int __init parport_pc_init_superio (int autoirq, int autodma) int ret = 0; while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev)) != NULL) { - id = pci_match_device (parport_pc_pci_tbl, pdev); + id = pci_match_id(parport_pc_pci_tbl, pdev); if (id == NULL || id->driver_data >= last_sio) continue; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index e65bf2b395a..aac6de9568e 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -7,7 +7,6 @@ #include #include #include -#include #include "pci.h" /* @@ -19,35 +18,11 @@ */ #ifdef CONFIG_HOTPLUG -/** - * pci_device_probe_dynamic() - * - * Walk the dynamic ID list looking for a match. - * returns 0 and sets pci_dev->driver when drv claims pci_dev, else error. - */ -static int -pci_device_probe_dynamic(struct pci_driver *drv, struct pci_dev *pci_dev) -{ - int error = -ENODEV; - struct list_head *pos; - struct dynid *dynid; - spin_lock(&drv->dynids.lock); - list_for_each(pos, &drv->dynids.list) { - dynid = list_entry(pos, struct dynid, node); - if (pci_match_one_device(&dynid->id, pci_dev)) { - spin_unlock(&drv->dynids.lock); - error = drv->probe(pci_dev, &dynid->id); - if (error >= 0) { - pci_dev->driver = drv; - return 0; - } - return error; - } - } - spin_unlock(&drv->dynids.lock); - return error; -} +struct pci_dynid { + struct list_head node; + struct pci_device_id id; +}; /** * store_new_id @@ -58,8 +33,7 @@ pci_device_probe_dynamic(struct pci_driver *drv, struct pci_dev *pci_dev) static inline ssize_t store_new_id(struct device_driver *driver, const char *buf, size_t count) { - struct dynid *dynid; - struct bus_type * bus; + struct pci_dynid *dynid; struct pci_driver *pdrv = to_pci_driver(driver); __u32 vendor=PCI_ANY_ID, device=PCI_ANY_ID, subvendor=PCI_ANY_ID, subdevice=PCI_ANY_ID, class=0, class_mask=0; @@ -91,37 +65,22 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count) list_add_tail(&pdrv->dynids.list, &dynid->node); spin_unlock(&pdrv->dynids.lock); - bus = get_bus(pdrv->driver.bus); - if (bus) { - if (get_driver(&pdrv->driver)) { - down_write(&bus->subsys.rwsem); - driver_attach(&pdrv->driver); - up_write(&bus->subsys.rwsem); - put_driver(&pdrv->driver); - } - put_bus(bus); + if (get_driver(&pdrv->driver)) { + driver_attach(&pdrv->driver); + put_driver(&pdrv->driver); } return count; } - static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id); -static inline void -pci_init_dynids(struct pci_dynids *dynids) -{ - spin_lock_init(&dynids->lock); - INIT_LIST_HEAD(&dynids->list); -} static void pci_free_dynids(struct pci_driver *drv) { - struct list_head *pos, *n; - struct dynid *dynid; + struct pci_dynid *dynid, *n; spin_lock(&drv->dynids.lock); - list_for_each_safe(pos, n, &drv->dynids.list) { - dynid = list_entry(pos, struct dynid, node); + list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { list_del(&dynid->node); kfree(dynid); } @@ -138,83 +97,70 @@ pci_create_newid_file(struct pci_driver *drv) return error; } -static int -pci_bus_match_dynids(const struct pci_dev *pci_dev, struct pci_driver *pci_drv) -{ - struct list_head *pos; - struct dynid *dynid; - - spin_lock(&pci_drv->dynids.lock); - list_for_each(pos, &pci_drv->dynids.list) { - dynid = list_entry(pos, struct dynid, node); - if (pci_match_one_device(&dynid->id, pci_dev)) { - spin_unlock(&pci_drv->dynids.lock); - return 1; - } - } - spin_unlock(&pci_drv->dynids.lock); - return 0; -} - #else /* !CONFIG_HOTPLUG */ -static inline int pci_device_probe_dynamic(struct pci_driver *drv, struct pci_dev *pci_dev) -{ - return -ENODEV; -} -static inline void pci_init_dynids(struct pci_dynids *dynids) {} static inline void pci_free_dynids(struct pci_driver *drv) {} static inline int pci_create_newid_file(struct pci_driver *drv) { return 0; } -static inline int pci_bus_match_dynids(const struct pci_dev *pci_dev, struct pci_driver *pci_drv) -{ - return 0; -} #endif /** - * pci_match_device - Tell if a PCI device structure has a matching - * PCI device id structure + * pci_match_id - See if a pci device matches a given pci_id table * @ids: array of PCI device id structures to search in - * @dev: the PCI device structure to match against - * + * @dev: the PCI device structure to match against. + * * Used by a driver to check whether a PCI device present in the - * system is in its list of supported devices.Returns the matching + * system is in its list of supported devices. Returns the matching * pci_device_id structure or %NULL if there is no match. + * + * Depreciated, don't use this as it will not catch any dynamic ids + * that a driver might want to check for. */ -const struct pci_device_id * -pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) +const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, + struct pci_dev *dev) { - while (ids->vendor || ids->subvendor || ids->class_mask) { - if (pci_match_one_device(ids, dev)) - return ids; - ids++; + if (ids) { + while (ids->vendor || ids->subvendor || ids->class_mask) { + if (pci_match_one_device(ids, dev)) + return ids; + ids++; + } } return NULL; } /** - * pci_device_probe_static() - * - * returns 0 and sets pci_dev->driver when drv claims pci_dev, else error. + * pci_match_device - Tell if a PCI device structure has a matching + * PCI device id structure + * @ids: array of PCI device id structures to search in + * @dev: the PCI device structure to match against + * @drv: the PCI driver to match against + * + * Used by a driver to check whether a PCI device present in the + * system is in its list of supported devices. Returns the matching + * pci_device_id structure or %NULL if there is no match. */ -static int -pci_device_probe_static(struct pci_driver *drv, struct pci_dev *pci_dev) -{ - int error = -ENODEV; +const struct pci_device_id *pci_match_device(struct pci_driver *drv, + struct pci_dev *dev) +{ const struct pci_device_id *id; + struct pci_dynid *dynid; - if (!drv->id_table) - return error; - id = pci_match_device(drv->id_table, pci_dev); + id = pci_match_id(drv->id_table, dev); if (id) - error = drv->probe(pci_dev, id); - if (error >= 0) { - pci_dev->driver = drv; - error = 0; + return id; + + /* static ids didn't match, lets look at the dynamic ones */ + spin_lock(&drv->dynids.lock); + list_for_each_entry(dynid, &drv->dynids.list, node) { + if (pci_match_one_device(&dynid->id, dev)) { + spin_unlock(&drv->dynids.lock); + return &dynid->id; + } } - return error; + spin_unlock(&drv->dynids.lock); + return NULL; } /** @@ -225,13 +171,20 @@ pci_device_probe_static(struct pci_driver *drv, struct pci_dev *pci_dev) */ static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev) -{ +{ + const struct pci_device_id *id; int error = 0; if (!pci_dev->driver && drv->probe) { - error = pci_device_probe_static(drv, pci_dev); - if (error == -ENODEV) - error = pci_device_probe_dynamic(drv, pci_dev); + error = -ENODEV; + + id = pci_match_device(drv, pci_dev); + if (id) + error = drv->probe(pci_dev, id); + if (error >= 0) { + pci_dev->driver = drv; + error = 0; + } } return error; } @@ -371,12 +324,6 @@ static struct kobj_type pci_driver_kobj_type = { .sysfs_ops = &pci_driver_sysfs_ops, }; -static int -pci_populate_driver_dir(struct pci_driver *drv) -{ - return pci_create_newid_file(drv); -} - /** * pci_register_driver - register a new pci driver * @drv: the driver structure to register @@ -401,13 +348,15 @@ int pci_register_driver(struct pci_driver *drv) drv->driver.shutdown = pci_device_shutdown; drv->driver.owner = drv->owner; drv->driver.kobj.ktype = &pci_driver_kobj_type; - pci_init_dynids(&drv->dynids); + + spin_lock_init(&drv->dynids.lock); + INIT_LIST_HEAD(&drv->dynids.list); /* register with core */ error = driver_register(&drv->driver); if (!error) - pci_populate_driver_dir(drv); + error = pci_create_newid_file(drv); return error; } @@ -463,21 +412,17 @@ pci_dev_driver(const struct pci_dev *dev) * system is in its list of supported devices.Returns the matching * pci_device_id structure or %NULL if there is no match. */ -static int pci_bus_match(struct device * dev, struct device_driver * drv) +static int pci_bus_match(struct device *dev, struct device_driver *drv) { - const struct pci_dev * pci_dev = to_pci_dev(dev); - struct pci_driver * pci_drv = to_pci_driver(drv); - const struct pci_device_id * ids = pci_drv->id_table; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *pci_drv = to_pci_driver(drv); const struct pci_device_id *found_id; - if (!ids) - return 0; - - found_id = pci_match_device(ids, pci_dev); + found_id = pci_match_device(pci_drv, pci_dev); if (found_id) return 1; - return pci_bus_match_dynids(pci_dev, pci_drv); + return 0; } /** @@ -536,6 +481,7 @@ static int __init pci_driver_init(void) postcore_initcall(pci_driver_init); +EXPORT_SYMBOL(pci_match_id); EXPORT_SYMBOL(pci_match_device); EXPORT_SYMBOL(pci_register_driver); EXPORT_SYMBOL(pci_unregister_driver); diff --git a/include/linux/pci-dynids.h b/include/linux/pci-dynids.h deleted file mode 100644 index 183b6b0de81..00000000000 --- a/include/linux/pci-dynids.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * PCI defines and function prototypes - * Copyright 2003 Dell Inc. - * by Matt Domsch - */ - -#ifndef LINUX_PCI_DYNIDS_H -#define LINUX_PCI_DYNIDS_H - -#include -#include - -struct dynid { - struct list_head node; - struct pci_device_id id; -}; - -#endif diff --git a/include/linux/pci.h b/include/linux/pci.h index a46cabfd08c..7ac14961ba2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -860,7 +860,8 @@ int pci_register_driver(struct pci_driver *); void pci_unregister_driver(struct pci_driver *); void pci_remove_behind_bridge(struct pci_dev *); struct pci_driver *pci_dev_driver(const struct pci_dev *); -const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev); +const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev); +const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, struct pci_dev *dev); int pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass); /* kmem_cache style wrapper around pci_alloc_consistent() */ diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c index defdc5a459f..909fef8903c 100644 --- a/sound/pci/bt87x.c +++ b/sound/pci/bt87x.c @@ -804,7 +804,7 @@ static int __devinit snd_bt87x_detect_card(struct pci_dev *pci) int i; const struct pci_device_id *supported; - supported = pci_match_device(snd_bt87x_ids, pci); + supported = pci_match_device(driver, pci); if (supported) return supported->driver_data; -- cgit v1.2.3-70-g09d2 From 21e2c01dc3e38d466eda5871645878d2c3a33261 Mon Sep 17 00:00:00 2001 From: Rob Punkunus Date: Sun, 3 Jul 2005 17:37:18 +0200 Subject: [PATCH] amd74xx: support MCP55 device IDs From: Rob Punkunus Rob Punkunus recently submitted a patch to enable support for MCP51/MCP55 in the amd74xx driver. This patch was whitespace-corrupted and didn't apply to 2.6.12 since MCP51 support was merged in the 2.6.12-rc series. Gentoo would like to support this hardware for our upcoming release media, so I fixed the patch, and here it is :) Signed-off-by: Daniel Drake Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/pci/amd74xx.c | 3 +++ include/linux/pci_ids.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c index 65eab9b63a7..844a6c9fb94 100644 --- a/drivers/ide/pci/amd74xx.c +++ b/drivers/ide/pci/amd74xx.c @@ -73,6 +73,7 @@ static struct amd_ide_chip { { PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, 0x50, AMD_UDMA_133 }, { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, 0x50, AMD_UDMA_133 }, { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, 0x50, AMD_UDMA_133 }, + { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, AMD_UDMA_133 }, { 0 } }; @@ -489,6 +490,7 @@ static ide_pci_device_t amd74xx_chipsets[] __devinitdata = { /* 13 */ DECLARE_NV_DEV("NFORCE-CK804"), /* 14 */ DECLARE_NV_DEV("NFORCE-MCP04"), /* 15 */ DECLARE_NV_DEV("NFORCE-MCP51"), + /* 16 */ DECLARE_NV_DEV("NFORCE-MCP55"), }; static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_id *id) @@ -524,6 +526,7 @@ static struct pci_device_id amd74xx_pci_tbl[] = { { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 13 }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 14 }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 15 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 16 }, { 0, }, }; MODULE_DEVICE_TABLE(pci, amd74xx_pci_tbl); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c3ee1ae4545..27348c22dac 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1238,6 +1238,7 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE 0x0265 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E #define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 #define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B -- cgit v1.2.3-70-g09d2 From e7270dec080002d8aa18256c756af6c32331ef48 Mon Sep 17 00:00:00 2001 From: Raphael Assenat Date: Mon, 4 Jul 2005 13:23:45 -0700 Subject: [SPARC64/COMPAT]: Add some compat ioctl for ppdev The following patch adds some ioctls to include/linux/compat_ioctl.h to allow using ppdev from the 32 bit user space on sparc64. This patch also adds the PPDEV option in the sparc64 menu, near Parallel printer support in the 'General machine setup' submenu. All those ioctls seem to be compatible, since (correct me if I'm wrong) they dont use the 'long' type. See include/linux/ppdev.h. The application I used to test the new ioctls only used the following: PPEXCL PPCLAIM PPNEGOT PPGETMODES PPRCONTROL PPWCONTROL PPDATADIR PPWDATA PPRDATA But I beleive that the other ioctls will work fine. Signed-off-by: David S. Miller --- arch/sparc64/Kconfig | 18 ++++++++++++++++++ include/linux/compat_ioctl.h | 19 ++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index e2b050eb3b9..d78bc13ebbb 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -444,6 +444,24 @@ config PRINTER If you have more than 8 printers, you need to increase the LP_NO macro in lp.c and the PARPORT_MAX macro in parport.h. +config PPDEV + tristate "Support for user-space parallel port device drivers" + depends on PARPORT + ---help--- + Saying Y to this adds support for /dev/parport device nodes. This + is needed for programs that want portable access to the parallel + port, for instance deviceid (which displays Plug-and-Play device + IDs). + + This is the parallel port equivalent of SCSI generic support (sg). + It is safe to say N to this -- it is not needed for normal printing + or parallel port CD-ROM/disk support. + + To compile this driver as a module, choose M here: the + module will be called ppdev. + + If unsure, say N. + config ENVCTRL tristate "SUNW, envctrl support" depends on PCI diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 70a4ebb5d96..ecb0d39c079 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -346,10 +346,27 @@ COMPATIBLE_IOCTL(PPPOEIOCDFWD) /* LP */ COMPATIBLE_IOCTL(LPGETSTATUS) /* ppdev */ +COMPATIBLE_IOCTL(PPSETMODE) +COMPATIBLE_IOCTL(PPRSTATUS) +COMPATIBLE_IOCTL(PPRCONTROL) +COMPATIBLE_IOCTL(PPWCONTROL) +COMPATIBLE_IOCTL(PPFCONTROL) +COMPATIBLE_IOCTL(PPRDATA) +COMPATIBLE_IOCTL(PPWDATA) COMPATIBLE_IOCTL(PPCLAIM) COMPATIBLE_IOCTL(PPRELEASE) -COMPATIBLE_IOCTL(PPEXCL) COMPATIBLE_IOCTL(PPYIELD) +COMPATIBLE_IOCTL(PPEXCL) +COMPATIBLE_IOCTL(PPDATADIR) +COMPATIBLE_IOCTL(PPNEGOT) +COMPATIBLE_IOCTL(PPWCTLONIRQ) +COMPATIBLE_IOCTL(PPCLRIRQ) +COMPATIBLE_IOCTL(PPSETPHASE) +COMPATIBLE_IOCTL(PPGETMODES) +COMPATIBLE_IOCTL(PPGETMODE) +COMPATIBLE_IOCTL(PPGETPHASE) +COMPATIBLE_IOCTL(PPGETFLAGS) +COMPATIBLE_IOCTL(PPSETFLAGS) /* CDROM stuff */ COMPATIBLE_IOCTL(CDROMPAUSE) COMPATIBLE_IOCTL(CDROMRESUME) -- cgit v1.2.3-70-g09d2 From 55820ee2f8c767a2833b21bd365e5753f50bd8ce Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 5 Jul 2005 14:08:10 -0700 Subject: [NET]: Fix signedness issues in net/core/filter.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the code to load packet data into a register: k = fentry->k; if (k < 0) { ... } else { u32 _tmp, *p; p = skb_header_pointer(skb, k, 4, &_tmp); if (p != NULL) { A = ntohl(*p); continue; } } skb_header_pointer checks if the requested data is within the linear area: int hlen = skb_headlen(skb); if (offset + len <= hlen) return skb->data + offset; When offset is within [INT_MAX-len+1..INT_MAX] the addition will result in a negative number which is <= hlen. I couldn't trigger a crash on my AMD64 with 2GB of memory, but a coworker tried on his x86 machine and it crashed immediately. This patch fixes the check in skb_header_pointer to handle large positive offsets similar to skb_copy_bits. Invalid data can still be accessed using negative offsets (also similar to skb_copy_bits), anyone using negative offsets needs to verify them himself. Thanks to Thomas Vögtle for verifying the problem by crashing his machine and providing me with an Oops. Signed-off-by: Patrick McHardy Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 416a2e4024b..fbcb1865197 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1211,7 +1211,7 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, { int hlen = skb_headlen(skb); - if (offset + len <= hlen) + if (hlen - offset >= len) return skb->data + offset; if (skb_copy_bits(skb, offset, buffer, len) < 0) -- cgit v1.2.3-70-g09d2 From e176fe8954a5239c24afe79b1001ba3c29511963 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 5 Jul 2005 14:12:44 -0700 Subject: [NET]: Remove unused security member in sk_buff Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +--- include/linux/tc_ematch/tc_em_meta.h | 2 +- net/core/skbuff.c | 2 -- net/ipv4/ip_output.c | 1 - net/ipv6/ip6_output.c | 1 - net/sched/em_meta.c | 6 ------ 6 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fbcb1865197..1e6290f4f81 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -183,7 +183,6 @@ struct skb_shared_info { * @priority: Packet queueing priority * @users: User count - see {datagram,tcp}.c * @protocol: Packet protocol from driver - * @security: Security level of packet * @truesize: Buffer size * @head: Head of buffer * @data: Data head pointer @@ -255,8 +254,7 @@ struct sk_buff { pkt_type, ip_summed; __u32 priority; - unsigned short protocol, - security; + unsigned short protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index a6b2cc530af..bcb762d9312 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -45,7 +45,7 @@ enum TCF_META_ID_REALDEV, TCF_META_ID_PRIORITY, TCF_META_ID_PROTOCOL, - TCF_META_ID_SECURITY, + TCF_META_ID_SECURITY, /* obsolete */ TCF_META_ID_PKTTYPE, TCF_META_ID_PKTLEN, TCF_META_ID_DATALEN, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bb73b2190ec..733deee24b9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -357,7 +357,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) C(ip_summed); C(priority); C(protocol); - C(security); n->destructor = NULL; #ifdef CONFIG_NETFILTER C(nfmark); @@ -422,7 +421,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->pkt_type = old->pkt_type; new->stamp = old->stamp; new->destructor = NULL; - new->security = old->security; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; new->nfcache = old->nfcache; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6ce5c3292f9..1bfa49eda96 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -389,7 +389,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; - to->security = from->security; dst_release(to->dst); to->dst = dst_clone(from->dst); to->dev = from->dev; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 06e7cdaeedc..1f2c2f9e353 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -465,7 +465,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; - to->security = from->security; dst_release(to->dst); to->dst = dst_clone(from->dst); to->dev = from->dev; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 48bb23c2a35..53d98f8d3d8 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -205,11 +205,6 @@ META_COLLECTOR(int_protocol) dst->value = skb->protocol; } -META_COLLECTOR(int_security) -{ - dst->value = skb->security; -} - META_COLLECTOR(int_pkttype) { dst->value = skb->pkt_type; @@ -524,7 +519,6 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [META_ID(REALDEV)] = META_FUNC(int_realdev), [META_ID(PRIORITY)] = META_FUNC(int_priority), [META_ID(PROTOCOL)] = META_FUNC(int_protocol), - [META_ID(SECURITY)] = META_FUNC(int_security), [META_ID(PKTTYPE)] = META_FUNC(int_pkttype), [META_ID(PKTLEN)] = META_FUNC(int_pktlen), [META_ID(DATALEN)] = META_FUNC(int_datalen), -- cgit v1.2.3-70-g09d2 From 1cbb3380ef683f742876f48e3739b3df4ea9e168 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 5 Jul 2005 14:13:41 -0700 Subject: [NET]: Reduce size of sk_buff by 4 bytes Reduce local_df to a bit field and ip_summed to a 2 bits field thus saving 13 bits. Move bit fields, packet type, and protocol into the spare area between the priority and the destructor. Saves 4 bytes on both, 32bit and 64bit architectures. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1e6290f4f81..14b95041349 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -248,17 +248,18 @@ struct sk_buff { data_len, mac_len, csum; - unsigned char local_df, - cloned:1, - nohdr:1, - pkt_type, - ip_summed; __u32 priority; - unsigned short protocol; + __u8 local_df:1, + cloned:1, + ip_summed:2, + nohdr:1; + /* 3 bits spare */ + __u8 pkt_type; + __u16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER - unsigned long nfmark; + unsigned long nfmark; __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; -- cgit v1.2.3-70-g09d2 From bc971dee6ece1fd0d431948924becd9c50e7b778 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Jul 2005 15:03:46 -0700 Subject: [SHAPER]: Switch to spinlocks. Dave, you were right and the sleeping locks in shaper were broken. Markus Kanet noticed this and also tested the patch below that switches locking to spinlocks. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/net/shaper.c | 42 ++++++++++++++++-------------------------- include/linux/if_shaper.h | 2 +- 2 files changed, 17 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c index 20edeb34579..3ad0b6751f6 100644 --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c @@ -135,10 +135,8 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct shaper *shaper = dev->priv; struct sk_buff *ptr; - - if (down_trylock(&shaper->sem)) - return -1; - + + spin_lock(&shaper->lock); ptr=shaper->sendq.prev; /* @@ -232,7 +230,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) shaper->stats.collisions++; } shaper_kick(shaper); - up(&shaper->sem); + spin_unlock(&shaper->lock); return 0; } @@ -271,11 +269,9 @@ static void shaper_timer(unsigned long data) { struct shaper *shaper = (struct shaper *)data; - if (!down_trylock(&shaper->sem)) { - shaper_kick(shaper); - up(&shaper->sem); - } else - mod_timer(&shaper->timer, jiffies); + spin_lock(&shaper->lock); + shaper_kick(shaper); + spin_unlock(&shaper->lock); } /* @@ -331,21 +327,6 @@ static void shaper_kick(struct shaper *shaper) } -/* - * Flush the shaper queues on a closedown - */ - -static void shaper_flush(struct shaper *shaper) -{ - struct sk_buff *skb; - - down(&shaper->sem); - while((skb=skb_dequeue(&shaper->sendq))!=NULL) - dev_kfree_skb(skb); - shaper_kick(shaper); - up(&shaper->sem); -} - /* * Bring the interface up. We just disallow this until a * bind. @@ -375,7 +356,15 @@ static int shaper_open(struct net_device *dev) static int shaper_close(struct net_device *dev) { struct shaper *shaper=dev->priv; - shaper_flush(shaper); + struct sk_buff *skb; + + while ((skb = skb_dequeue(&shaper->sendq)) != NULL) + dev_kfree_skb(skb); + + spin_lock_bh(&shaper->lock); + shaper_kick(shaper); + spin_unlock_bh(&shaper->lock); + del_timer_sync(&shaper->timer); return 0; } @@ -576,6 +565,7 @@ static void shaper_init_priv(struct net_device *dev) init_timer(&sh->timer); sh->timer.function=shaper_timer; sh->timer.data=(unsigned long)sh; + spin_lock_init(&sh->lock); } /* diff --git a/include/linux/if_shaper.h b/include/linux/if_shaper.h index 004e6f09a6e..68c896a36a3 100644 --- a/include/linux/if_shaper.h +++ b/include/linux/if_shaper.h @@ -23,7 +23,7 @@ struct shaper __u32 shapeclock; unsigned long recovery; /* Time we can next clock a packet out on an empty queue */ - struct semaphore sem; + spinlock_t lock; struct net_device_stats stats; struct net_device *dev; int (*hard_start_xmit) (struct sk_buff *skb, -- cgit v1.2.3-70-g09d2 From c1b4a7e69576d65efc31a8cea0714173c2841244 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:24:38 -0700 Subject: [TCP]: Move to new TSO segmenting scheme. Make TSO segment transmit size decisions at send time not earlier. The basic scheme is that we try to build as large a TSO frame as possible when pulling in the user data, but the size of the TSO frame output to the card is determined at transmit time. This is guided by tp->xmit_size_goal. It is always set to a multiple of MSS and tells sendmsg/sendpage how large an SKB to try and build. Later, tcp_write_xmit() and tcp_push_one() chop up the packet if necessary and conditions warrant. These routines can also decide to "defer" in order to wait for more ACKs to arrive and thus allow larger TSO frames to be emitted. A general observation is that TSO elongates the pipe, thus requiring a larger congestion window and larger buffering especially at the sender side. Therefore, it is important that applications 1) get a large enough socket send buffer (this is accomplished by our dynamic send buffer expansion code) 2) do large enough writes. Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 4 +- net/ipv4/tcp.c | 26 ++- net/ipv4/tcp_input.c | 10 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_output.c | 578 +++++++++++++++++++++++++++++++------------------- net/ipv6/tcp_ipv6.c | 2 +- 7 files changed, 384 insertions(+), 240 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index dfd93d03f5d..e4fd82e4210 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -286,7 +286,7 @@ struct tcp_sock { __u32 max_window; /* Maximal window ever seen from peer */ __u32 pmtu_cookie; /* Last pmtu seen by socket */ __u32 mss_cache; /* Cached effective mss, not including SACKS */ - __u16 mss_cache_std; /* Like mss_cache, but without TSO */ + __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ __u8 ca_state; /* State of fast-retransmit machine */ __u8 retransmits; /* Number of unrecovered RTO timeouts. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b19238027da..a166918ca56 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -862,7 +862,7 @@ extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); extern void tcp_send_active_reset(struct sock *sk, int priority); extern int tcp_send_synack(struct sock *); -extern void tcp_push_one(struct sock *, unsigned mss_now); +extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); @@ -968,7 +968,7 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long static inline void tcp_initialize_rcv_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - unsigned int hint = min(tp->advmss, tp->mss_cache_std); + unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd/2); hint = min(hint, TCP_MIN_RCVMSS); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2ba73bf3a8f..29894c74916 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -615,7 +615,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse size_t psize, int flags) { struct tcp_sock *tp = tcp_sk(sk); - int mss_now; + int mss_now, size_goal; int err; ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -628,6 +628,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + size_goal = tp->xmit_size_goal; copied = 0; err = -EPIPE; @@ -641,7 +642,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse int offset = poffset % PAGE_SIZE; int size = min_t(size_t, psize, PAGE_SIZE - offset); - if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) { + if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) { new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -652,7 +653,7 @@ new_segment: goto wait_for_memory; skb_entail(sk, tp, skb); - copy = mss_now; + copy = size_goal; } if (copy > size) @@ -693,7 +694,7 @@ new_segment: if (!(psize -= copy)) goto out; - if (skb->len != mss_now || (flags & MSG_OOB)) + if (skb->len < mss_now || (flags & MSG_OOB)) continue; if (forced_push(tp)) { @@ -713,6 +714,7 @@ wait_for_memory: goto do_error; mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + size_goal = tp->xmit_size_goal; } out: @@ -754,7 +756,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, static inline int select_size(struct sock *sk, struct tcp_sock *tp) { - int tmp = tp->mss_cache_std; + int tmp = tp->mss_cache; if (sk->sk_route_caps & NETIF_F_SG) { if (sk->sk_route_caps & NETIF_F_TSO) @@ -778,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int iovlen, flags; - int mss_now; + int mss_now, size_goal; int err, copied; long timeo; @@ -797,6 +799,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + size_goal = tp->xmit_size_goal; /* Ok commence sending. */ iovlen = msg->msg_iovlen; @@ -819,7 +822,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, skb = sk->sk_write_queue.prev; if (!sk->sk_send_head || - (copy = mss_now - skb->len) <= 0) { + (copy = size_goal - skb->len) <= 0) { new_segment: /* Allocate new segment. If the interface is SG, @@ -842,7 +845,7 @@ new_segment: skb->ip_summed = CHECKSUM_HW; skb_entail(sk, tp, skb); - copy = mss_now; + copy = size_goal; } /* Try to append data to the end of skb. */ @@ -937,7 +940,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len != mss_now || (flags & MSG_OOB)) + if (skb->len < mss_now || (flags & MSG_OOB)) continue; if (forced_push(tp)) { @@ -957,6 +960,7 @@ wait_for_memory: goto do_error; mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + size_goal = tp->xmit_size_goal; } } @@ -2128,7 +2132,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rto = jiffies_to_usecs(tp->rto); info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); - info->tcpi_snd_mss = tp->mss_cache_std; + info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = tp->ack.rcv_mss; info->tcpi_unacked = tp->packets_out; @@ -2178,7 +2182,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, switch (optname) { case TCP_MAXSEG: - val = tp->mss_cache_std; + val = tp->mss_cache; if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->rx_opt.user_mss; break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2ef2f355b8b..8de2f1071c2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -740,10 +740,10 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); if (!cwnd) { - if (tp->mss_cache_std > 1460) + if (tp->mss_cache > 1460) cwnd = 2; else - cwnd = (tp->mss_cache_std > 1095) ? 3 : 4; + cwnd = (tp->mss_cache > 1095) ? 3 : 4; } return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } @@ -914,7 +914,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (sk->sk_route_caps & NETIF_F_TSO) { sk->sk_route_caps &= ~NETIF_F_TSO; sock_set_flag(sk, SOCK_NO_LARGESEND); - tp->mss_cache = tp->mss_cache_std; + tp->mss_cache = tp->mss_cache; } if (!tp->sacked_out) @@ -1077,7 +1077,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ (IsFack(tp) || !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq + tp->reordering * - tp->mss_cache_std))) { + tp->mss_cache))) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); @@ -3334,7 +3334,7 @@ static void tcp_new_space(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_should_expand_sndbuf(sk, tp)) { - int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache_std) + + int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ebf112347a9..62f62bb05c2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2045,7 +2045,7 @@ static int tcp_v4_init_sock(struct sock *sk) */ tp->snd_ssthresh = 0x7fffffff; /* Infinity */ tp->snd_cwnd_clamp = ~0; - tp->mss_cache_std = tp->mss_cache = 536; + tp->mss_cache = 536; tp->reordering = sysctl_tcp_reordering; tp->ca_ops = &tcp_init_congestion_ops; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0a4cd24b657..fd3ce38184a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -49,7 +49,7 @@ int sysctl_tcp_retrans_collapse = 1; * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. */ -int sysctl_tcp_tso_win_divisor = 8; +int sysctl_tcp_tso_win_divisor = 3; static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) @@ -403,21 +403,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) sk->sk_send_head = skb; } -static inline void tcp_tso_set_push(struct sk_buff *skb) -{ - /* Force push to be on for any TSO frames to workaround - * problems with busted implementations like Mac OS-X that - * hold off socket receive wakeups until push is seen. - */ - if (tcp_skb_pcount(skb) > 1) - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; -} - static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - if (skb->len <= tp->mss_cache_std || + if (skb->len <= tp->mss_cache || !(sk->sk_route_caps & NETIF_F_TSO)) { /* Avoid the costly divide in the normal * non-TSO case. @@ -427,164 +417,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) } else { unsigned int factor; - factor = skb->len + (tp->mss_cache_std - 1); - factor /= tp->mss_cache_std; + factor = skb->len + (tp->mss_cache - 1); + factor /= tp->mss_cache; skb_shinfo(skb)->tso_segs = factor; - skb_shinfo(skb)->tso_size = tp->mss_cache_std; - } -} - -/* Does SKB fit into the send window? */ -static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss) -{ - u32 end_seq = TCP_SKB_CB(skb)->end_seq; - - return !after(end_seq, tp->snd_una + tp->snd_wnd); -} - -/* Can at least one segment of SKB be sent right now, according to the - * congestion window rules? If so, return how many segments are allowed. - */ -static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb) -{ - u32 in_flight, cwnd; - - /* Don't be strict about the congestion window for the final FIN. */ - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) - return 1; - - in_flight = tcp_packets_in_flight(tp); - cwnd = tp->snd_cwnd; - if (in_flight < cwnd) - return (cwnd - in_flight); - - return 0; -} - -static inline int tcp_minshall_check(const struct tcp_sock *tp) -{ - return after(tp->snd_sml,tp->snd_una) && - !after(tp->snd_sml, tp->snd_nxt); -} - -/* Return 0, if packet can be sent now without violation Nagle's rules: - * 1. It is full sized. - * 2. Or it contains FIN. (already checked by caller) - * 3. Or TCP_NODELAY was set. - * 4. Or TCP_CORK is not set, and all sent packets are ACKed. - * With Minshall's modification: all sent small packets are ACKed. - */ - -static inline int tcp_nagle_check(const struct tcp_sock *tp, - const struct sk_buff *skb, - unsigned mss_now, int nonagle) -{ - return (skb->len < mss_now && - ((nonagle&TCP_NAGLE_CORK) || - (!nonagle && - tp->packets_out && - tcp_minshall_check(tp)))); -} - -/* Return non-zero if the Nagle test allows this packet to be - * sent now. - */ -static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, - unsigned int cur_mss, int nonagle) -{ - /* Nagle rule does not apply to frames, which sit in the middle of the - * write_queue (they have no chances to get new data). - * - * This is implemented in the callers, where they modify the 'nonagle' - * argument based upon the location of SKB in the send queue. - */ - if (nonagle & TCP_NAGLE_PUSH) - return 1; - - /* Don't use the nagle rule for urgent data (or for the final FIN). */ - if (tp->urg_mode || - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) - return 1; - - if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) - return 1; - - return 0; -} - -/* This must be invoked the first time we consider transmitting - * SKB onto the wire. - */ -static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb) -{ - int tso_segs = tcp_skb_pcount(skb); - - if (!tso_segs) { - tcp_set_skb_tso_segs(sk, skb); - tso_segs = tcp_skb_pcount(skb); - } - return tso_segs; -} - -/* This checks if the data bearing packet SKB (usually sk->sk_send_head) - * should be put on the wire right now. If so, it returns the number of - * packets allowed by the congestion window. - */ -static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, - unsigned int cur_mss, int nonagle) -{ - struct tcp_sock *tp = tcp_sk(sk); - unsigned int cwnd_quota; - - tcp_init_tso_segs(sk, skb); - - if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) - return 0; - - cwnd_quota = tcp_cwnd_test(tp, skb); - if (cwnd_quota && - !tcp_snd_wnd_test(tp, skb, cur_mss)) - cwnd_quota = 0; - - return cwnd_quota; -} - -static inline int tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb->next == (struct sk_buff *)&sk->sk_write_queue; -} - -int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) -{ - struct sk_buff *skb = sk->sk_send_head; - - return (skb && - tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), - (tcp_skb_is_last(sk, skb) ? - TCP_NAGLE_PUSH : - tp->nonagle))); -} - - -/* Send _single_ skb sitting at the send head. This function requires - * true push pending frames to setup probe timer etc. - */ -void tcp_push_one(struct sock *sk, unsigned cur_mss) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = sk->sk_send_head; - - if (tcp_snd_test(sk, skb, cur_mss, TCP_NAGLE_PUSH)) { - /* Send it out now. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_tso_set_push(skb); - if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) { - sk->sk_send_head = NULL; - tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - tcp_packets_out_inc(sk, tp, skb); - return; - } + skb_shinfo(skb)->tso_size = tp->mss_cache; } } @@ -791,7 +627,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* And store cached results */ tp->pmtu_cookie = pmtu; - tp->mss_cache = tp->mss_cache_std = mss_now; + tp->mss_cache = mss_now; return mss_now; } @@ -803,56 +639,47 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) * cannot be large. However, taking into account rare use of URG, this * is not a big flaw. */ - -unsigned int tcp_current_mss(struct sock *sk, int large) +unsigned int tcp_current_mss(struct sock *sk, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); - unsigned int do_large, mss_now; + u32 mss_now; + u16 xmit_size_goal; + int doing_tso = 0; + + mss_now = tp->mss_cache; + + if (large_allowed && + (sk->sk_route_caps & NETIF_F_TSO) && + !tp->urg_mode) + doing_tso = 1; - mss_now = tp->mss_cache_std; if (dst) { u32 mtu = dst_mtu(dst); if (mtu != tp->pmtu_cookie) mss_now = tcp_sync_mss(sk, mtu); } - do_large = (large && - (sk->sk_route_caps & NETIF_F_TSO) && - !tp->urg_mode); + if (tp->rx_opt.eff_sacks) + mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + + (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); - if (do_large) { - unsigned int large_mss, factor, limit; + xmit_size_goal = mss_now; - large_mss = 65535 - tp->af_specific->net_header_len - + if (doing_tso) { + xmit_size_goal = 65535 - + tp->af_specific->net_header_len - tp->ext_header_len - tp->tcp_header_len; - if (tp->max_window && large_mss > (tp->max_window>>1)) - large_mss = max((tp->max_window>>1), - 68U - tp->tcp_header_len); - - factor = large_mss / mss_now; + if (tp->max_window && + (xmit_size_goal > (tp->max_window >> 1))) + xmit_size_goal = max((tp->max_window >> 1), + 68U - tp->tcp_header_len); - /* Always keep large mss multiple of real mss, but - * do not exceed 1/tso_win_divisor of the congestion window - * so we can keep the ACK clock ticking and minimize - * bursting. - */ - limit = tp->snd_cwnd; - if (sysctl_tcp_tso_win_divisor) - limit /= sysctl_tcp_tso_win_divisor; - limit = max(1U, limit); - if (factor > limit) - factor = limit; - - tp->mss_cache = mss_now * factor; - - mss_now = tp->mss_cache; + xmit_size_goal -= (xmit_size_goal % mss_now); } + tp->xmit_size_goal = xmit_size_goal; - if (tp->rx_opt.eff_sacks) - mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + - (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); return mss_now; } @@ -876,6 +703,251 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) } } +static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) +{ + u32 window, cwnd_len; + + window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq); + cwnd_len = mss_now * cwnd; + return min(window, cwnd_len); +} + +/* Can at least one segment of SKB be sent right now, according to the + * congestion window rules? If so, return how many segments are allowed. + */ +static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb) +{ + u32 in_flight, cwnd; + + /* Don't be strict about the congestion window for the final FIN. */ + if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) + return 1; + + in_flight = tcp_packets_in_flight(tp); + cwnd = tp->snd_cwnd; + if (in_flight < cwnd) + return (cwnd - in_flight); + + return 0; +} + +/* This must be invoked the first time we consider transmitting + * SKB onto the wire. + */ +static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb) +{ + int tso_segs = tcp_skb_pcount(skb); + + if (!tso_segs) { + tcp_set_skb_tso_segs(sk, skb); + tso_segs = tcp_skb_pcount(skb); + } + return tso_segs; +} + +static inline int tcp_minshall_check(const struct tcp_sock *tp) +{ + return after(tp->snd_sml,tp->snd_una) && + !after(tp->snd_sml, tp->snd_nxt); +} + +/* Return 0, if packet can be sent now without violation Nagle's rules: + * 1. It is full sized. + * 2. Or it contains FIN. (already checked by caller) + * 3. Or TCP_NODELAY was set. + * 4. Or TCP_CORK is not set, and all sent packets are ACKed. + * With Minshall's modification: all sent small packets are ACKed. + */ + +static inline int tcp_nagle_check(const struct tcp_sock *tp, + const struct sk_buff *skb, + unsigned mss_now, int nonagle) +{ + return (skb->len < mss_now && + ((nonagle&TCP_NAGLE_CORK) || + (!nonagle && + tp->packets_out && + tcp_minshall_check(tp)))); +} + +/* Return non-zero if the Nagle test allows this packet to be + * sent now. + */ +static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, + unsigned int cur_mss, int nonagle) +{ + /* Nagle rule does not apply to frames, which sit in the middle of the + * write_queue (they have no chances to get new data). + * + * This is implemented in the callers, where they modify the 'nonagle' + * argument based upon the location of SKB in the send queue. + */ + if (nonagle & TCP_NAGLE_PUSH) + return 1; + + /* Don't use the nagle rule for urgent data (or for the final FIN). */ + if (tp->urg_mode || + (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) + return 1; + + if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) + return 1; + + return 0; +} + +/* Does at least the first segment of SKB fit into the send window? */ +static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss) +{ + u32 end_seq = TCP_SKB_CB(skb)->end_seq; + + if (skb->len > cur_mss) + end_seq = TCP_SKB_CB(skb)->seq + cur_mss; + + return !after(end_seq, tp->snd_una + tp->snd_wnd); +} + +/* This checks if the data bearing packet SKB (usually sk->sk_send_head) + * should be put on the wire right now. If so, it returns the number of + * packets allowed by the congestion window. + */ +static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, + unsigned int cur_mss, int nonagle) +{ + struct tcp_sock *tp = tcp_sk(sk); + unsigned int cwnd_quota; + + tcp_init_tso_segs(sk, skb); + + if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) + return 0; + + cwnd_quota = tcp_cwnd_test(tp, skb); + if (cwnd_quota && + !tcp_snd_wnd_test(tp, skb, cur_mss)) + cwnd_quota = 0; + + return cwnd_quota; +} + +static inline int tcp_skb_is_last(const struct sock *sk, + const struct sk_buff *skb) +{ + return skb->next == (struct sk_buff *)&sk->sk_write_queue; +} + +int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) +{ + struct sk_buff *skb = sk->sk_send_head; + + return (skb && + tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), + (tcp_skb_is_last(sk, skb) ? + TCP_NAGLE_PUSH : + tp->nonagle))); +} + +/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet + * which is put after SKB on the list. It is very much like + * tcp_fragment() except that it may make several kinds of assumptions + * in order to speed up the splitting operation. In particular, we + * know that all the data is in scatter-gather pages, and that the + * packet has never been sent out before (and thus is not cloned). + */ +static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) +{ + struct sk_buff *buff; + int nlen = skb->len - len; + u16 flags; + + /* All of a TSO frame must be composed of paged data. */ + BUG_ON(skb->len != skb->data_len); + + buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); + if (unlikely(buff == NULL)) + return -ENOMEM; + + buff->truesize = nlen; + skb->truesize -= nlen; + + /* Correct the sequence numbers. */ + TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; + TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; + + /* PSH and FIN should only be set in the second packet. */ + flags = TCP_SKB_CB(skb)->flags; + TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); + TCP_SKB_CB(buff)->flags = flags; + + /* This packet was never sent out yet, so no SACK bits. */ + TCP_SKB_CB(buff)->sacked = 0; + + buff->ip_summed = skb->ip_summed = CHECKSUM_HW; + skb_split(skb, buff, len); + + /* Fix up tso_factor for both original and new SKB. */ + tcp_set_skb_tso_segs(sk, skb); + tcp_set_skb_tso_segs(sk, buff); + + /* Link BUFF into the send queue. */ + skb_header_release(buff); + __skb_append(skb, buff); + + return 0; +} + +/* Try to defer sending, if possible, in order to minimize the amount + * of TSO splitting we do. View it as a kind of TSO Nagle test. + * + * This algorithm is from John Heffner. + */ +static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) +{ + u32 send_win, cong_win, limit, in_flight; + + if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) + return 0; + + in_flight = tcp_packets_in_flight(tp); + + BUG_ON(tcp_skb_pcount(skb) <= 1 || + (tp->snd_cwnd <= in_flight)); + + send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq; + + /* From in_flight test above, we know that cwnd > in_flight. */ + cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; + + limit = min(send_win, cong_win); + + /* If sk_send_head can be sent fully now, just do it. */ + if (skb->len <= limit) + return 0; + + if (sysctl_tcp_tso_win_divisor) { + u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); + + /* If at least some fraction of a window is available, + * just use it. + */ + chunk /= sysctl_tcp_tso_win_divisor; + if (limit >= chunk) + return 0; + } else { + /* Different approach, try not to defer past a single + * ACK. Receiver should ACK every other full sized + * frame, so if we have space for more than 3 frames + * then send now. + */ + if (limit > tcp_max_burst(tp) * tp->mss_cache) + return 0; + } + + /* Ok, it looks like it is advisable to defer. */ + return 1; +} + /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -887,8 +959,8 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int tso_segs, cwnd_quota; - int sent_pkts; + unsigned int tso_segs, sent_pkts; + int cwnd_quota; /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and all @@ -903,24 +975,44 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) tso_segs = tcp_init_tso_segs(sk, skb); cwnd_quota = tcp_cwnd_test(tp, skb); + if (unlikely(!cwnd_quota)) + goto out; + sent_pkts = 0; + while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) { + BUG_ON(!tso_segs); - while (cwnd_quota >= tso_segs) { - if (unlikely(!tcp_nagle_test(tp, skb, mss_now, - (tcp_skb_is_last(sk, skb) ? - nonagle : TCP_NAGLE_PUSH)))) - break; + if (tso_segs == 1) { + if (unlikely(!tcp_nagle_test(tp, skb, mss_now, + (tcp_skb_is_last(sk, skb) ? + nonagle : TCP_NAGLE_PUSH)))) + break; + } else { + if (tcp_tso_should_defer(sk, tp, skb)) + break; + } - if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) - break; + if (tso_segs > 1) { + u32 limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); + + if (skb->len < limit) { + unsigned int trim = skb->len % mss_now; - if (unlikely(skb->len > mss_now)) { + if (trim) + limit = skb->len - trim; + } + if (skb->len > limit) { + if (tso_fragment(sk, skb, limit)) + break; + } + } else if (unlikely(skb->len > mss_now)) { if (unlikely(tcp_fragment(sk, skb, mss_now))) break; } TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_tso_set_push(skb); + if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) break; @@ -936,6 +1028,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) * the packet above, tso_segs will no longer be valid. */ cwnd_quota -= tcp_skb_pcount(skb); + + BUG_ON(cwnd_quota < 0); + if (!cwnd_quota) + break; + skb = sk->sk_send_head; if (!skb) break; @@ -946,7 +1043,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) tcp_cwnd_validate(sk, tp); return 0; } - +out: return !tp->packets_out && sk->sk_send_head; } @@ -965,6 +1062,53 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, } } +/* Send _single_ skb sitting at the send head. This function requires + * true push pending frames to setup probe timer etc. + */ +void tcp_push_one(struct sock *sk, unsigned int mss_now) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb = sk->sk_send_head; + unsigned int tso_segs, cwnd_quota; + + BUG_ON(!skb || skb->len < mss_now); + + tso_segs = tcp_init_tso_segs(sk, skb); + cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); + + if (likely(cwnd_quota)) { + BUG_ON(!tso_segs); + + if (tso_segs > 1) { + u32 limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); + + if (skb->len < limit) { + unsigned int trim = skb->len % mss_now; + + if (trim) + limit = skb->len - trim; + } + if (skb->len > limit) { + if (unlikely(tso_fragment(sk, skb, limit))) + return; + } + } else if (unlikely(skb->len > mss_now)) { + if (unlikely(tcp_fragment(sk, skb, mss_now))) + return; + } + + /* Send it out now. */ + TCP_SKB_CB(skb)->when = tcp_time_stamp; + + if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) { + update_send_head(sk, tp, skb); + tcp_cwnd_validate(sk, tp); + return; + } + } +} + /* This function returns the amount that we can raise the * usable window based on the following constraints * @@ -1222,7 +1366,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (sk->sk_route_caps & NETIF_F_TSO) { sk->sk_route_caps &= ~NETIF_F_TSO; sock_set_flag(sk, SOCK_NO_LARGESEND); - tp->mss_cache = tp->mss_cache_std; } if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) @@ -1284,7 +1427,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * is still in somebody's hands, else make a clone. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_tso_set_push(skb); err = tcp_transmit_skb(sk, (skb_cloned(skb) ? pskb_copy(skb, GFP_ATOMIC): @@ -1853,14 +1995,12 @@ int tcp_write_wakeup(struct sock *sk) if (sk->sk_route_caps & NETIF_F_TSO) { sock_set_flag(sk, SOCK_NO_LARGESEND); sk->sk_route_caps &= ~NETIF_F_TSO; - tp->mss_cache = tp->mss_cache_std; } } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb); TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_tso_set_push(skb); err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); if (!err) { update_send_head(sk, tp, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9dac7fdf472..f6e288dc116 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2018,7 +2018,7 @@ static int tcp_v6_init_sock(struct sock *sk) */ tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_clamp = ~0; - tp->mss_cache_std = tp->mss_cache = 536; + tp->mss_cache = 536; tp->reordering = sysctl_tcp_reordering; -- cgit v1.2.3-70-g09d2 From 6772926bef3c9f0ec761b39e5702535471fff70b Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Tue, 5 Jul 2005 18:54:50 -0700 Subject: [PATCH] kprobes: fix namespace problem and sparc64 build The following renames arch_init, a kprobes function for performing any architecture specific initialization, to arch_init_kprobes in order to cleanup the namespace. Also, this patch adds arch_init_kprobes to sparc64 to fix the sparc64 kprobes build from the last return probe patch. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 2 +- arch/ia64/kernel/kprobes.c | 2 +- arch/ppc64/kernel/kprobes.c | 2 +- arch/sparc64/kernel/kprobes.c | 5 +++++ arch/x86_64/kernel/kprobes.c | 2 +- include/linux/kprobes.h | 2 +- kernel/kprobes.c | 2 +- 7 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index fc8b1752176..a6d8c45961d 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -537,7 +537,7 @@ static struct kprobe trampoline_p = { .pre_handler = trampoline_probe_handler }; -int __init arch_init(void) +int __init arch_init_kprobes(void) { return register_kprobe(&trampoline_p); } diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 3aa3167edbe..884f5cd27d8 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -713,7 +713,7 @@ static struct kprobe trampoline_p = { .pre_handler = trampoline_probe_handler }; -int __init arch_init(void) +int __init arch_init_kprobes(void) { trampoline_p.addr = (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index 1d2ff6d6b0b..a3d519518fb 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -444,7 +444,7 @@ static struct kprobe trampoline_p = { .pre_handler = trampoline_probe_handler }; -int __init arch_init(void) +int __init arch_init_kprobes(void) { return register_kprobe(&trampoline_p); } diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index bdac631cf01..bbf11f85dab 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -433,3 +433,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +/* architecture specific initialization */ +int arch_init_kprobes(void) +{ + return 0; +} diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index acd2a778ebe..5c6dc705148 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -682,7 +682,7 @@ static struct kprobe trampoline_p = { .pre_handler = trampoline_probe_handler }; -int __init arch_init(void) +int __init arch_init_kprobes(void) { return register_kprobe(&trampoline_p); } diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b7a194c4362..e050fc2d4c2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -155,7 +155,7 @@ extern void arch_copy_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); extern void arch_remove_kprobe(struct kprobe *p); -extern int arch_init(void); +extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern kprobe_opcode_t *get_insn_slot(void); extern void free_insn_slot(kprobe_opcode_t *slot); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 90c0e82b650..b0237122b24 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -574,7 +574,7 @@ static int __init init_kprobes(void) INIT_HLIST_HEAD(&kretprobe_inst_table[i]); } - err = arch_init(); + err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); -- cgit v1.2.3-70-g09d2 From 5e6557722e69840506eb8bc5a1edcdb4e447a917 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 6 Jul 2005 15:44:41 -0400 Subject: [PATCH] openfirmware: generate device table for userspace This converts the usage of struct of_match to struct of_device_id, similar to pci_device_id. This allows a device table to be generated, which can be parsed by depmod(8) to generate a map file for module loading. In order for hotplug to work with macio devices, patches to module-init-tools and hotplug must be applied. Those patches are available at: ftp://ftp.suse.com/pub/people/jeffm/linux/macio-hotplug/ Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- arch/ppc/syslib/of_device.c | 15 ++++++++------- arch/ppc64/kernel/of_device.c | 15 ++++++++------- drivers/i2c/busses/i2c-keywest.c | 7 +++---- drivers/ide/ppc/pmac.c | 12 ++---------- drivers/macintosh/macio_asic.c | 4 ++-- drivers/macintosh/mediabay.c | 7 ++----- drivers/macintosh/therm_pm72.c | 9 ++++----- drivers/macintosh/therm_windtunnel.c | 6 +++--- drivers/net/bmac.c | 7 ++----- drivers/net/mace.c | 6 ++---- drivers/net/wireless/airport.c | 8 ++++---- drivers/scsi/mac53c94.c | 7 +++---- drivers/scsi/mesh.c | 8 +++----- drivers/serial/pmac_zilog.c | 9 +++------ drivers/video/platinumfb.c | 6 ++---- include/asm-ppc/macio.h | 5 +++-- include/asm-ppc/of_device.h | 20 ++++---------------- include/linux/mod_devicetable.h | 11 +++++++++++ scripts/mod/file2alias.c | 22 ++++++++++++++++++++++ 19 files changed, 91 insertions(+), 93 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc/syslib/of_device.c b/arch/ppc/syslib/of_device.c index 49c0e34e2d6..1eb4f726ca9 100644 --- a/arch/ppc/syslib/of_device.c +++ b/arch/ppc/syslib/of_device.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -15,20 +16,20 @@ * Used by a driver to check whether an of_device present in the * system is in its list of supported devices. */ -const struct of_match * of_match_device(const struct of_match *matches, +const struct of_device_id * of_match_device(const struct of_device_id *matches, const struct of_device *dev) { if (!dev->node) return NULL; - while (matches->name || matches->type || matches->compatible) { + while (matches->name[0] || matches->type[0] || matches->compatible[0]) { int match = 1; - if (matches->name && matches->name != OF_ANY_MATCH) + if (matches->name[0]) match &= dev->node->name && !strcmp(matches->name, dev->node->name); - if (matches->type && matches->type != OF_ANY_MATCH) + if (matches->type[0]) match &= dev->node->type && !strcmp(matches->type, dev->node->type); - if (matches->compatible && matches->compatible != OF_ANY_MATCH) + if (matches->compatible[0]) match &= device_is_compatible(dev->node, matches->compatible); if (match) @@ -42,7 +43,7 @@ static int of_platform_bus_match(struct device *dev, struct device_driver *drv) { struct of_device * of_dev = to_of_device(dev); struct of_platform_driver * of_drv = to_of_platform_driver(drv); - const struct of_match * matches = of_drv->match_table; + const struct of_device_id * matches = of_drv->match_table; if (!matches) return 0; @@ -75,7 +76,7 @@ static int of_device_probe(struct device *dev) int error = -ENODEV; struct of_platform_driver *drv; struct of_device *of_dev; - const struct of_match *match; + const struct of_device_id *match; drv = to_of_platform_driver(dev->driver); of_dev = to_of_device(dev); diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c index 66bd5ab7c25..b80e81984ba 100644 --- a/arch/ppc64/kernel/of_device.c +++ b/arch/ppc64/kernel/of_device.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -15,20 +16,20 @@ * Used by a driver to check whether an of_device present in the * system is in its list of supported devices. */ -const struct of_match * of_match_device(const struct of_match *matches, +const struct of_device_id *of_match_device(const struct of_device_id *matches, const struct of_device *dev) { if (!dev->node) return NULL; - while (matches->name || matches->type || matches->compatible) { + while (matches->name[0] || matches->type[0] || matches->compatible[0]) { int match = 1; - if (matches->name && matches->name != OF_ANY_MATCH) + if (matches->name[0]) match &= dev->node->name && !strcmp(matches->name, dev->node->name); - if (matches->type && matches->type != OF_ANY_MATCH) + if (matches->type[0]) match &= dev->node->type && !strcmp(matches->type, dev->node->type); - if (matches->compatible && matches->compatible != OF_ANY_MATCH) + if (matches->compatible[0]) match &= device_is_compatible(dev->node, matches->compatible); if (match) @@ -42,7 +43,7 @@ static int of_platform_bus_match(struct device *dev, struct device_driver *drv) { struct of_device * of_dev = to_of_device(dev); struct of_platform_driver * of_drv = to_of_platform_driver(drv); - const struct of_match * matches = of_drv->match_table; + const struct of_device_id * matches = of_drv->match_table; if (!matches) return 0; @@ -75,7 +76,7 @@ static int of_device_probe(struct device *dev) int error = -ENODEV; struct of_platform_driver *drv; struct of_device *of_dev; - const struct of_match *match; + const struct of_device_id *match; drv = to_of_platform_driver(dev->driver); of_dev = to_of_device(dev); diff --git a/drivers/i2c/busses/i2c-keywest.c b/drivers/i2c/busses/i2c-keywest.c index 363e545fc01..94ae808314f 100644 --- a/drivers/i2c/busses/i2c-keywest.c +++ b/drivers/i2c/busses/i2c-keywest.c @@ -698,7 +698,7 @@ dispose_iface(struct device *dev) } static int -create_iface_macio(struct macio_dev* dev, const struct of_match *match) +create_iface_macio(struct macio_dev* dev, const struct of_device_id *match) { return create_iface(dev->ofdev.node, &dev->ofdev.dev); } @@ -710,7 +710,7 @@ dispose_iface_macio(struct macio_dev* dev) } static int -create_iface_of_platform(struct of_device* dev, const struct of_match *match) +create_iface_of_platform(struct of_device* dev, const struct of_device_id *match) { return create_iface(dev->node, &dev->dev); } @@ -721,10 +721,9 @@ dispose_iface_of_platform(struct of_device* dev) return dispose_iface(&dev->dev); } -static struct of_match i2c_keywest_match[] = +static struct of_device_id i2c_keywest_match[] = { { - .name = OF_ANY_MATCH, .type = "i2c", .compatible = "keywest" }, diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 818380b5fd2..be0fcc8f4b1 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1419,7 +1419,7 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif) * Attach to a macio probed interface */ static int __devinit -pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_match *match) +pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match) { void __iomem *base; unsigned long regbase; @@ -1637,27 +1637,19 @@ pmac_ide_pci_resume(struct pci_dev *pdev) return rc; } -static struct of_match pmac_ide_macio_match[] = +static struct of_device_id pmac_ide_macio_match[] = { { .name = "IDE", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH }, { .name = "ATA", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH }, { - .name = OF_ANY_MATCH, .type = "ide", - .compatible = OF_ANY_MATCH }, { - .name = OF_ANY_MATCH, .type = "ata", - .compatible = OF_ANY_MATCH }, {}, }; diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index d0bda7e3e6a..37b18ee08a2 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -33,7 +33,7 @@ static int macio_bus_match(struct device *dev, struct device_driver *drv) { struct macio_dev * macio_dev = to_macio_device(dev); struct macio_driver * macio_drv = to_macio_driver(drv); - const struct of_match * matches = macio_drv->match_table; + const struct of_device_id * matches = macio_drv->match_table; if (!matches) return 0; @@ -66,7 +66,7 @@ static int macio_device_probe(struct device *dev) int error = -ENODEV; struct macio_driver *drv; struct macio_dev *macio_dev; - const struct of_match *match; + const struct of_device_id *match; drv = to_macio_driver(dev->driver); macio_dev = to_macio_device(dev); diff --git a/drivers/macintosh/mediabay.c b/drivers/macintosh/mediabay.c index 4be709e13ee..7c16c25fc5d 100644 --- a/drivers/macintosh/mediabay.c +++ b/drivers/macintosh/mediabay.c @@ -642,7 +642,7 @@ static int __pmac media_bay_task(void *x) } } -static int __devinit media_bay_attach(struct macio_dev *mdev, const struct of_match *match) +static int __devinit media_bay_attach(struct macio_dev *mdev, const struct of_device_id *match) { struct media_bay_info* bay; u32 __iomem *regbase; @@ -797,23 +797,20 @@ static struct mb_ops keylargo_mb_ops __pmacdata = { * Therefore we do it all by polling the media bay once each tick. */ -static struct of_match media_bay_match[] = +static struct of_device_id media_bay_match[] = { { .name = "media-bay", - .type = OF_ANY_MATCH, .compatible = "keylargo-media-bay", .data = &keylargo_mb_ops, }, { .name = "media-bay", - .type = OF_ANY_MATCH, .compatible = "heathrow-media-bay", .data = &heathrow_mb_ops, }, { .name = "media-bay", - .type = OF_ANY_MATCH, .compatible = "ohare-media-bay", .data = &ohare_mb_ops, }, diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index feb4e241385..703e3197331 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -120,6 +120,7 @@ #include #include #include +#include #include "therm_pm72.h" @@ -1986,7 +1987,7 @@ static void fcu_lookup_fans(struct device_node *fcu_node) } } -static int fcu_of_probe(struct of_device* dev, const struct of_match *match) +static int fcu_of_probe(struct of_device* dev, const struct of_device_id *match) { int rc; @@ -2009,12 +2010,10 @@ static int fcu_of_remove(struct of_device* dev) return 0; } -static struct of_match fcu_of_match[] = +static struct of_device_id fcu_match[] = { { - .name = OF_ANY_MATCH, .type = "fcu", - .compatible = OF_ANY_MATCH }, {}, }; @@ -2022,7 +2021,7 @@ static struct of_match fcu_of_match[] = static struct of_platform_driver fcu_of_platform_driver = { .name = "temperature", - .match_table = fcu_of_match, + .match_table = fcu_match, .probe = fcu_of_probe, .remove = fcu_of_remove }; diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 61400f04015..cbb72eb0426 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -43,6 +43,7 @@ #include #include #include +#include #define LOG_TEMP 0 /* continously log temperature */ @@ -450,7 +451,7 @@ do_probe( struct i2c_adapter *adapter, int addr, int kind ) /************************************************************************/ static int -therm_of_probe( struct of_device *dev, const struct of_match *match ) +therm_of_probe( struct of_device *dev, const struct of_device_id *match ) { return i2c_add_driver( &g4fan_driver ); } @@ -461,9 +462,8 @@ therm_of_remove( struct of_device *dev ) return i2c_del_driver( &g4fan_driver ); } -static struct of_match therm_of_match[] = {{ +static struct of_device_id therm_of_match[] = {{ .name = "fan", - .type = OF_ANY_MATCH, .compatible = "adm1030" }, {} }; diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c index 00e5257b176..8dc657fc8af 100644 --- a/drivers/net/bmac.c +++ b/drivers/net/bmac.c @@ -1261,7 +1261,7 @@ static void bmac_reset_and_enable(struct net_device *dev) spin_unlock_irqrestore(&bp->lock, flags); } -static int __devinit bmac_probe(struct macio_dev *mdev, const struct of_match *match) +static int __devinit bmac_probe(struct macio_dev *mdev, const struct of_device_id *match) { int j, rev, ret; struct bmac_data *bp; @@ -1645,16 +1645,13 @@ static int __devexit bmac_remove(struct macio_dev *mdev) return 0; } -static struct of_match bmac_match[] = +static struct of_device_id bmac_match[] = { { .name = "bmac", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH, .data = (void *)0, }, { - .name = OF_ANY_MATCH, .type = "network", .compatible = "bmac+", .data = (void *)1, diff --git a/drivers/net/mace.c b/drivers/net/mace.c index 6ed2d7dbd44..81d0a26e4f4 100644 --- a/drivers/net/mace.c +++ b/drivers/net/mace.c @@ -109,7 +109,7 @@ bitrev(int b) } -static int __devinit mace_probe(struct macio_dev *mdev, const struct of_match *match) +static int __devinit mace_probe(struct macio_dev *mdev, const struct of_device_id *match) { struct device_node *mace = macio_get_of_node(mdev); struct net_device *dev; @@ -1009,12 +1009,10 @@ static irqreturn_t mace_rxdma_intr(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -static struct of_match mace_match[] = +static struct of_device_id mace_match[] = { { .name = "mace", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH }, {}, }; diff --git a/drivers/net/wireless/airport.c b/drivers/net/wireless/airport.c index b4f4bd7956a..9d496703c46 100644 --- a/drivers/net/wireless/airport.c +++ b/drivers/net/wireless/airport.c @@ -184,7 +184,7 @@ static int airport_hard_reset(struct orinoco_private *priv) } static int -airport_attach(struct macio_dev *mdev, const struct of_match *match) +airport_attach(struct macio_dev *mdev, const struct of_device_id *match) { struct orinoco_private *priv; struct net_device *dev; @@ -266,16 +266,16 @@ MODULE_AUTHOR("Benjamin Herrenschmidt "); MODULE_DESCRIPTION("Driver for the Apple Airport wireless card."); MODULE_LICENSE("Dual MPL/GPL"); -static struct of_match airport_match[] = +static struct of_device_id airport_match[] = { { .name = "radio", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH }, {}, }; +MODULE_DEVICE_TABLE (of, airport_match); + static struct macio_driver airport_driver = { .name = DRIVER_NAME, diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c index edd47d1f0b1..932dcf0366e 100644 --- a/drivers/scsi/mac53c94.c +++ b/drivers/scsi/mac53c94.c @@ -424,7 +424,7 @@ static struct scsi_host_template mac53c94_template = { .use_clustering = DISABLE_CLUSTERING, }; -static int mac53c94_probe(struct macio_dev *mdev, const struct of_match *match) +static int mac53c94_probe(struct macio_dev *mdev, const struct of_device_id *match) { struct device_node *node = macio_get_of_node(mdev); struct pci_dev *pdev = macio_get_pci_dev(mdev); @@ -544,15 +544,14 @@ static int mac53c94_remove(struct macio_dev *mdev) } -static struct of_match mac53c94_match[] = +static struct of_device_id mac53c94_match[] = { { .name = "53c94", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH }, {}, }; +MODULE_DEVICE_TABLE (of, mac53c94_match); static struct macio_driver mac53c94_driver = { diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index b05737ae5ef..ff1933298da 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -1847,7 +1847,7 @@ static struct scsi_host_template mesh_template = { .use_clustering = DISABLE_CLUSTERING, }; -static int mesh_probe(struct macio_dev *mdev, const struct of_match *match) +static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match) { struct device_node *mesh = macio_get_of_node(mdev); struct pci_dev* pdev = macio_get_pci_dev(mdev); @@ -2012,20 +2012,18 @@ static int mesh_remove(struct macio_dev *mdev) } -static struct of_match mesh_match[] = +static struct of_device_id mesh_match[] = { { .name = "mesh", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH }, { - .name = OF_ANY_MATCH, .type = "scsi", .compatible = "chrp,mesh0" }, {}, }; +MODULE_DEVICE_TABLE (of, mesh_match); static struct macio_driver mesh_driver = { diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c index 1c9f7161712..7db2f37532c 100644 --- a/drivers/serial/pmac_zilog.c +++ b/drivers/serial/pmac_zilog.c @@ -1545,7 +1545,7 @@ static void pmz_dispose_port(struct uart_pmac_port *uap) /* * Called upon match with an escc node in the devive-tree. */ -static int pmz_attach(struct macio_dev *mdev, const struct of_match *match) +static int pmz_attach(struct macio_dev *mdev, const struct of_device_id *match) { int i; @@ -1850,20 +1850,17 @@ err_out: return rc; } -static struct of_match pmz_match[] = +static struct of_device_id pmz_match[] = { { .name = "ch-a", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH }, { .name = "ch-b", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH }, {}, }; +MODULE_DEVICE_TABLE (of, pmz_match); static struct macio_driver pmz_driver = { diff --git a/drivers/video/platinumfb.c b/drivers/video/platinumfb.c index 3dd1de1539d..b00887e9851 100644 --- a/drivers/video/platinumfb.c +++ b/drivers/video/platinumfb.c @@ -523,7 +523,7 @@ int __init platinumfb_setup(char *options) #define invalidate_cache(addr) #endif -static int __devinit platinumfb_probe(struct of_device* odev, const struct of_match *match) +static int __devinit platinumfb_probe(struct of_device* odev, const struct of_device_id *match) { struct device_node *dp = odev->node; struct fb_info *info; @@ -647,12 +647,10 @@ static int __devexit platinumfb_remove(struct of_device* odev) return 0; } -static struct of_match platinumfb_match[] = +static struct of_device_id platinumfb_match[] = { { .name = "platinum", - .type = OF_ANY_MATCH, - .compatible = OF_ANY_MATCH, }, {}, }; diff --git a/include/asm-ppc/macio.h b/include/asm-ppc/macio.h index 2cafc997860..a481b772d15 100644 --- a/include/asm-ppc/macio.h +++ b/include/asm-ppc/macio.h @@ -1,6 +1,7 @@ #ifndef __MACIO_ASIC_H__ #define __MACIO_ASIC_H__ +#include #include extern struct bus_type macio_bus_type; @@ -120,10 +121,10 @@ static inline struct pci_dev *macio_get_pci_dev(struct macio_dev *mdev) struct macio_driver { char *name; - struct of_match *match_table; + struct of_device_id *match_table; struct module *owner; - int (*probe)(struct macio_dev* dev, const struct of_match *match); + int (*probe)(struct macio_dev* dev, const struct of_device_id *match); int (*remove)(struct macio_dev* dev); int (*suspend)(struct macio_dev* dev, pm_message_t state); diff --git a/include/asm-ppc/of_device.h b/include/asm-ppc/of_device.h index 7229735a7c1..4b264cfd399 100644 --- a/include/asm-ppc/of_device.h +++ b/include/asm-ppc/of_device.h @@ -24,20 +24,8 @@ struct of_device }; #define to_of_device(d) container_of(d, struct of_device, dev) -/* - * Struct used for matching a device - */ -struct of_match -{ - char *name; - char *type; - char *compatible; - void *data; -}; -#define OF_ANY_MATCH ((char *)-1L) - -extern const struct of_match *of_match_device( - const struct of_match *matches, const struct of_device *dev); +extern const struct of_device_id *of_match_device( + const struct of_device_id *matches, const struct of_device *dev); extern struct of_device *of_dev_get(struct of_device *dev); extern void of_dev_put(struct of_device *dev); @@ -49,10 +37,10 @@ extern void of_dev_put(struct of_device *dev); struct of_platform_driver { char *name; - struct of_match *match_table; + struct of_device_id *match_table; struct module *owner; - int (*probe)(struct of_device* dev, const struct of_match *match); + int (*probe)(struct of_device* dev, const struct of_device_id *match); int (*remove)(struct of_device* dev); int (*suspend)(struct of_device* dev, pm_message_t state); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 9b6d05172ed..dce53ac1625 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -174,6 +174,17 @@ struct serio_device_id { __u8 proto; }; +/* + * Struct used for matching a device + */ +struct of_device_id +{ + char name[32]; + char type[32]; + char compatible[128]; + void *data; +}; + /* PCMCIA */ diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 908bff6d1ee..5180405c1a8 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -25,6 +25,8 @@ typedef Elf64_Addr kernel_ulong_t; #include #endif +#include + typedef uint32_t __u32; typedef uint16_t __u16; typedef unsigned char __u8; @@ -323,6 +325,22 @@ static int do_pcmcia_entry(const char *filename, +static int do_of_entry (const char *filename, struct of_device_id *of, char *alias) +{ + char *tmp; + sprintf (alias, "of:N%sT%sC%s", + of->name[0] ? of->name : "*", + of->type[0] ? of->type : "*", + of->compatible[0] ? of->compatible : "*"); + + /* Replace all whitespace with underscores */ + for (tmp = alias; tmp && *tmp; tmp++) + if (isspace (*tmp)) + *tmp = '_'; + + return 1; +} + /* Ignore any prefix, eg. v850 prepends _ */ static inline int sym_is(const char *symbol, const char *name) { @@ -401,6 +419,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, else if (sym_is(symname, "__mod_pcmcia_device_table")) do_table(symval, sym->st_size, sizeof(struct pcmcia_device_id), do_pcmcia_entry, mod); + else if (sym_is(symname, "__mod_of_device_table")) + do_table(symval, sym->st_size, sizeof(struct of_device_id), + do_of_entry, mod); + } /* Now add out buffered information to the generated C source */ -- cgit v1.2.3-70-g09d2 From 40725181b74be6b0e3bdc8c05bd1e0b9873ec5cc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:51:52 -0700 Subject: [CRYPTO] Add support for low-level multi-block operations This patch adds hooks for cipher algorithms to implement multi-block ECB/CBC operations directly. This is expected to provide significant performance boots to the VIA Padlock. It could also be used for improving software implementations such as AES where operating on multiple blocks at a time may enable certain optimisations. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/cipher.c | 38 ++++++++++++++++++-------------------- crypto/internal.h | 5 ----- include/linux/crypto.h | 28 +++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/crypto/cipher.c b/crypto/cipher.c index c4243345b15..54c4a560070 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -23,14 +23,6 @@ #include "internal.h" #include "scatterwalk.h" -struct cipher_desc { - struct crypto_tfm *tfm; - void (*crfn)(void *ctx, u8 *dst, const u8 *src); - unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, - const u8 *src, unsigned int nbytes); - void *info; -}; - static inline void xor_64(u8 *a, const u8 *b) { ((u32 *)a)[0] ^= ((u32 *)b)[0]; @@ -224,10 +216,11 @@ static int ecb_encrypt(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = ecb_process; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_ecb ?: ecb_process; return crypt(&desc, dst, src, nbytes); } @@ -238,10 +231,11 @@ static int ecb_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = ecb_process; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_ecb ?: ecb_process; return crypt(&desc, dst, src, nbytes); } @@ -252,10 +246,11 @@ static int cbc_encrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = cbc_process_encrypt; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt; desc.info = tfm->crt_cipher.cit_iv; return crypt(&desc, dst, src, nbytes); @@ -267,10 +262,11 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = cbc_process_encrypt; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt; desc.info = iv; return crypt(&desc, dst, src, nbytes); @@ -282,10 +278,11 @@ static int cbc_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = cbc_process_decrypt; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt; desc.info = tfm->crt_cipher.cit_iv; return crypt(&desc, dst, src, nbytes); @@ -297,10 +294,11 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = cbc_process_decrypt; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt; desc.info = iv; return crypt(&desc, dst, src, nbytes); diff --git a/crypto/internal.h b/crypto/internal.h index 964b9a60ca2..5ed383f7dce 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -42,11 +42,6 @@ static inline void crypto_yield(struct crypto_tfm *tfm) cond_resched(); } -static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) -{ - return (void *)&tfm[1]; -} - struct crypto_alg *crypto_alg_lookup(const char *name); /* A far more intelligent version of this is planned. For now, just diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 387da6a3e58..26ce01c2574 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -61,6 +61,15 @@ #define CRYPTO_DIR_DECRYPT 0 struct scatterlist; +struct crypto_tfm; + +struct cipher_desc { + struct crypto_tfm *tfm; + void (*crfn)(void *ctx, u8 *dst, const u8 *src); + unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, + const u8 *src, unsigned int nbytes); + void *info; +}; /* * Algorithms: modular crypto algorithm implementations, managed @@ -73,6 +82,19 @@ struct cipher_alg { unsigned int keylen, u32 *flags); void (*cia_encrypt)(void *ctx, u8 *dst, const u8 *src); void (*cia_decrypt)(void *ctx, u8 *dst, const u8 *src); + + unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); }; struct digest_alg { @@ -136,7 +158,6 @@ static inline int crypto_alg_available(const char *name, u32 flags) * and core processing logic. Managed via crypto_alloc_tfm() and * crypto_free_tfm(), as well as the various helpers below. */ -struct crypto_tfm; struct cipher_tfm { void *cit_iv; @@ -266,6 +287,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_digest.dia_digestsize; } +static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) +{ + return (void *)&tfm[1]; +} + /* * API wrappers. */ -- cgit v1.2.3-70-g09d2 From 95477377995aefa2ec1654a9a3777bd57ea99146 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:52:09 -0700 Subject: [CRYPTO] Add alignmask for low-level cipher implementations The VIA Padlock device requires the input and output buffers to be aligned on 16-byte boundaries. This patch adds the alignmask attribute for low-level cipher implementations to indicate their alignment requirements. The mid-level crypt() function will copy the input/output buffers if they are not aligned correctly before they are passed to the low-level implementation. Strictly speaking, some of the software implementations require the buffers to be aligned on 4-byte boundaries as they do 32-bit loads. However, it is not clear whether it is better to copy the buffers or pay the penalty for unaligned loads/stores. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 6 ++++++ crypto/cipher.c | 43 ++++++++++++++++++++++++++++++++++++------- crypto/scatterwalk.h | 6 ++++++ include/linux/crypto.h | 1 + 4 files changed, 49 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index 394169a8577..f55856b2199 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -168,6 +168,12 @@ int crypto_register_alg(struct crypto_alg *alg) { int ret = 0; struct crypto_alg *q; + + if (alg->cra_alignmask & (alg->cra_alignmask + 1)) + return -EINVAL; + + if (alg->cra_alignmask > PAGE_SIZE) + return -EINVAL; down_write(&crypto_alg_sem); diff --git a/crypto/cipher.c b/crypto/cipher.c index 54c4a560070..85eb12f8e56 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -41,8 +41,10 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, unsigned int bsize) { - u8 src[bsize]; - u8 dst[bsize]; + unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask; + u8 buffer[bsize * 2 + alignmask]; + u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + u8 *dst = src + bsize; unsigned int n; n = scatterwalk_copychunks(src, in, bsize, 0); @@ -59,15 +61,24 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, static inline unsigned int crypt_fast(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, - unsigned int nbytes) + unsigned int nbytes, u8 *tmp) { u8 *src, *dst; src = in->data; dst = scatterwalk_samebuf(in, out) ? src : out->data; + if (tmp) { + memcpy(tmp, in->data, nbytes); + src = tmp; + dst = tmp; + } + nbytes = desc->prfn(desc, dst, src, nbytes); + if (tmp) + memcpy(out->data, tmp, nbytes); + scatterwalk_advance(in, nbytes); scatterwalk_advance(out, nbytes); @@ -87,6 +98,8 @@ static int crypt(const struct cipher_desc *desc, struct scatter_walk walk_in, walk_out; struct crypto_tfm *tfm = desc->tfm; const unsigned int bsize = crypto_tfm_alg_blocksize(tfm); + unsigned int alignmask = tfm->__crt_alg->cra_alignmask; + unsigned long buffer = 0; if (!nbytes) return 0; @@ -100,16 +113,27 @@ static int crypt(const struct cipher_desc *desc, scatterwalk_start(&walk_out, dst); for(;;) { - unsigned int n; + unsigned int n = nbytes; + u8 *tmp = NULL; + + if (!scatterwalk_aligned(&walk_in, alignmask) || + !scatterwalk_aligned(&walk_out, alignmask)) { + if (!buffer) { + buffer = __get_free_page(GFP_ATOMIC); + if (!buffer) + n = 0; + } + tmp = (u8 *)buffer; + } scatterwalk_map(&walk_in, 0); scatterwalk_map(&walk_out, 1); - n = scatterwalk_clamp(&walk_in, nbytes); + n = scatterwalk_clamp(&walk_in, n); n = scatterwalk_clamp(&walk_out, n); if (likely(n >= bsize)) - n = crypt_fast(desc, &walk_in, &walk_out, n); + n = crypt_fast(desc, &walk_in, &walk_out, n, tmp); else n = crypt_slow(desc, &walk_in, &walk_out, bsize); @@ -119,10 +143,15 @@ static int crypt(const struct cipher_desc *desc, scatterwalk_done(&walk_out, 1, nbytes); if (!nbytes) - return 0; + break; crypto_yield(tfm); } + + if (buffer) + free_page(buffer); + + return 0; } static unsigned int cbc_process_encrypt(const struct cipher_desc *desc, diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h index 5495bb97081..e79925c474a 100644 --- a/crypto/scatterwalk.h +++ b/crypto/scatterwalk.h @@ -55,6 +55,12 @@ static inline void scatterwalk_advance(struct scatter_walk *walk, walk->len_this_segment -= nbytes; } +static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk, + unsigned int alignmask) +{ + return !(walk->offset & alignmask); +} + void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg); int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out); void scatterwalk_map(struct scatter_walk *walk, int out); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 26ce01c2574..ac9d49beecd 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -124,6 +124,7 @@ struct crypto_alg { u32 cra_flags; unsigned int cra_blocksize; unsigned int cra_ctxsize; + unsigned int cra_alignmask; const char cra_name[CRYPTO_MAX_ALG_NAME]; union { -- cgit v1.2.3-70-g09d2 From fbdae9f3e7fb57c07cb0d973f113eb25da2e8ff2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:53:29 -0700 Subject: [CRYPTO] Ensure cit_iv is aligned correctly This patch ensures that cit_iv is aligned according to cra_alignmask by allocating it as part of the tfm structure. As a side effect the crypto layer will also guarantee that the tfm ctx area has enough space to be aligned by cra_alignmask. This allows us to remove the extra space reservation from the Padlock driver. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 32 +++++++++++++++++++++++++++++--- crypto/cipher.c | 15 +++++++++------ crypto/internal.h | 28 ++++++++++++++++++++++++++++ drivers/crypto/padlock-aes.c | 3 +-- include/linux/crypto.h | 5 +++++ 5 files changed, 72 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index 0b583d24f7f..2d8d828c0ca 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -125,20 +125,46 @@ static void crypto_exit_ops(struct crypto_tfm *tfm) } } +static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags) +{ + unsigned int len; + + switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + default: + BUG(); + + case CRYPTO_ALG_TYPE_CIPHER: + len = crypto_cipher_ctxsize(alg, flags); + break; + + case CRYPTO_ALG_TYPE_DIGEST: + len = crypto_digest_ctxsize(alg, flags); + break; + + case CRYPTO_ALG_TYPE_COMPRESS: + len = crypto_compress_ctxsize(alg, flags); + break; + } + + return len + alg->cra_alignmask; +} + struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) { struct crypto_tfm *tfm = NULL; struct crypto_alg *alg; + unsigned int tfm_size; alg = crypto_alg_mod_lookup(name); if (alg == NULL) goto out; - - tfm = kmalloc(sizeof(*tfm) + alg->cra_ctxsize, GFP_KERNEL); + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags); + tfm = kmalloc(tfm_size, GFP_KERNEL); if (tfm == NULL) goto out_put; - memset(tfm, 0, sizeof(*tfm) + alg->cra_ctxsize); + memset(tfm, 0, tfm_size); tfm->__crt_alg = alg; diff --git a/crypto/cipher.c b/crypto/cipher.c index 85eb12f8e56..d3295ce14a5 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -41,7 +41,7 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, unsigned int bsize) { - unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask; + unsigned int alignmask = crypto_tfm_alg_alignmask(desc->tfm); u8 buffer[bsize * 2 + alignmask]; u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); u8 *dst = src + bsize; @@ -98,7 +98,7 @@ static int crypt(const struct cipher_desc *desc, struct scatter_walk walk_in, walk_out; struct crypto_tfm *tfm = desc->tfm; const unsigned int bsize = crypto_tfm_alg_blocksize(tfm); - unsigned int alignmask = tfm->__crt_alg->cra_alignmask; + unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); unsigned long buffer = 0; if (!nbytes) @@ -399,6 +399,8 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) } if (ops->cit_mode == CRYPTO_TFM_MODE_CBC) { + unsigned int align; + unsigned long addr; switch (crypto_tfm_alg_blocksize(tfm)) { case 8: @@ -418,9 +420,11 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) } ops->cit_ivsize = crypto_tfm_alg_blocksize(tfm); - ops->cit_iv = kmalloc(ops->cit_ivsize, GFP_KERNEL); - if (ops->cit_iv == NULL) - ret = -ENOMEM; + align = crypto_tfm_alg_alignmask(tfm) + 1; + addr = (unsigned long)crypto_tfm_ctx(tfm); + addr = ALIGN(addr, align); + addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align); + ops->cit_iv = (void *)addr; } out: @@ -429,5 +433,4 @@ out: void crypto_exit_cipher_ops(struct crypto_tfm *tfm) { - kfree(tfm->crt_cipher.cit_iv); } diff --git a/crypto/internal.h b/crypto/internal.h index 83b1b6d6d92..68612874b5f 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -16,6 +16,7 @@ #include #include #include +#include #include extern enum km_type crypto_km_types[]; @@ -61,6 +62,33 @@ static inline void crypto_init_proc(void) { } #endif +static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg, + int flags) +{ + return alg->cra_ctxsize; +} + +static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg, + int flags) +{ + unsigned int len = alg->cra_ctxsize; + + switch (flags & CRYPTO_TFM_MODE_MASK) { + case CRYPTO_TFM_MODE_CBC: + len = ALIGN(len, alg->cra_alignmask + 1); + len += alg->cra_blocksize; + break; + } + + return len; +} + +static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg, + int flags) +{ + return alg->cra_ctxsize; +} + int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags); diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index d2745ff4699..c5b58fae95f 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -465,8 +465,7 @@ static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx) + - PADLOCK_ALIGNMENT, + .cra_ctxsize = sizeof(struct aes_ctx), .cra_alignmask = PADLOCK_ALIGNMENT - 1, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ac9d49beecd..5e2bcc636a0 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -288,6 +288,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_digest.dia_digestsize; } +static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_alignmask; +} + static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) { return (void *)&tfm[1]; -- cgit v1.2.3-70-g09d2 From 97f927a4d7dbccde0a854a62c3ea54d90bae8679 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 7 Jul 2005 16:50:16 +0200 Subject: [MTD] XIP cleanup Move the architecture dependend code into include/asm/mtd-xip.h Signed-off-by: Thomas Gleixner --- drivers/mtd/chips/cfi_cmdset_0001.c | 2 +- drivers/mtd/chips/cfi_cmdset_0002.c | 2 +- include/asm-arm/arch-pxa/mtd-xip.h | 37 +++++++++++++++++++++++++++++++++++ include/asm-arm/arch-sa1100/mtd-xip.h | 26 ++++++++++++++++++++++++ include/asm-arm/mtd-xip.h | 26 ++++++++++++++++++++++++ include/linux/mtd/xip.h | 31 ++++++++++++----------------- 6 files changed, 104 insertions(+), 20 deletions(-) create mode 100644 include/asm-arm/arch-pxa/mtd-xip.h create mode 100644 include/asm-arm/arch-sa1100/mtd-xip.h create mode 100644 include/asm-arm/mtd-xip.h (limited to 'include/linux') diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index 8b1304531d8..0cfcd88468e 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -845,7 +845,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip, chip->state = FL_READY; } (void) map_read(map, adr); - asm volatile (".rep 8; nop; .endr"); /* fill instruction prefetch */ + xip_iprefetch(); local_irq_enable(); } diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index c76c30de48f..8505f118f2d 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -600,7 +600,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip, chip->state = FL_READY; } (void) map_read(map, adr); - asm volatile (".rep 8; nop; .endr"); /* fill instruction prefetch */ + xip_iprefetch(); local_irq_enable(); } diff --git a/include/asm-arm/arch-pxa/mtd-xip.h b/include/asm-arm/arch-pxa/mtd-xip.h new file mode 100644 index 00000000000..8704dbceb43 --- /dev/null +++ b/include/asm-arm/arch-pxa/mtd-xip.h @@ -0,0 +1,37 @@ +/* + * MTD primitives for XIP support. Architecture specific functions + * + * Do not include this file directly. It's included from linux/mtd/xip.h + * + * Author: Nicolas Pitre + * Created: Nov 2, 2004 + * Copyright: (C) 2004 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * $Id: xip.h,v 1.2 2004/12/01 15:49:10 nico Exp $ + */ + +#ifndef __ARCH_PXA_MTD_XIP_H__ +#define __ARCH_PXA_MTD_XIP_H__ + +#include + +#define xip_irqpending() (ICIP & ICMR) + +/* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ +#define xip_currtime() (OSCR) +#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) + +/* + * xip_cpu_idle() is used when waiting for a delay equal or larger than + * the system timer tick period. This should put the CPU into idle mode + * to save power and to be woken up only when some interrupts are pending. + * As above, this should not rely upon standard kernel code. + */ + +#define xip_cpu_idle() asm volatile ("mcr p14, 0, %0, c7, c0, 0" :: "r" (1)) + +#endif /* __ARCH_PXA_MTD_XIP_H__ */ diff --git a/include/asm-arm/arch-sa1100/mtd-xip.h b/include/asm-arm/arch-sa1100/mtd-xip.h new file mode 100644 index 00000000000..80cfdac2b94 --- /dev/null +++ b/include/asm-arm/arch-sa1100/mtd-xip.h @@ -0,0 +1,26 @@ +/* + * MTD primitives for XIP support. Architecture specific functions + * + * Do not include this file directly. It's included from linux/mtd/xip.h + * + * Author: Nicolas Pitre + * Created: Nov 2, 2004 + * Copyright: (C) 2004 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * $Id: xip.h,v 1.2 2004/12/01 15:49:10 nico Exp $ + */ + +#ifndef __ARCH_SA1100_MTD_XIP_H__ +#define __ARCH_SA1100_MTD_XIP_H__ + +#define xip_irqpending() (ICIP & ICMR) + +/* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ +#define xip_currtime() (OSCR) +#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) + +#endif /* __ARCH_SA1100_MTD_XIP_H__ */ diff --git a/include/asm-arm/mtd-xip.h b/include/asm-arm/mtd-xip.h new file mode 100644 index 00000000000..9eb127cc7db --- /dev/null +++ b/include/asm-arm/mtd-xip.h @@ -0,0 +1,26 @@ +/* + * MTD primitives for XIP support. Architecture specific functions + * + * Do not include this file directly. It's included from linux/mtd/xip.h + * + * Author: Nicolas Pitre + * Created: Nov 2, 2004 + * Copyright: (C) 2004 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * $Id: xip.h,v 1.2 2004/12/01 15:49:10 nico Exp $ + */ + +#ifndef __ARM_MTD_XIP_H__ +#define __ARM_MTD_XIP_H__ + +#include +#include + +/* fill instruction prefetch */ +#define xip_iprefetch() do { asm volatile (".rep 8; nop; .endr"); } while (0) + +#endif /* __ARM_MTD_XIP_H__ */ diff --git a/include/linux/mtd/xip.h b/include/linux/mtd/xip.h index fc071125cbc..7b7deef6b18 100644 --- a/include/linux/mtd/xip.h +++ b/include/linux/mtd/xip.h @@ -58,22 +58,16 @@ * returned value is <= the real elapsed time. * note 2: this should be able to cope with a few seconds without * overflowing. + * + * xip_iprefetch() + * + * Macro to fill instruction prefetch + * e.g. a series of nops: asm volatile (".rep 8; nop; .endr"); */ -#if defined(CONFIG_ARCH_SA1100) || defined(CONFIG_ARCH_PXA) - -#include -#ifdef CONFIG_ARCH_PXA -#include -#endif - -#define xip_irqpending() (ICIP & ICMR) - -/* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ -#define xip_currtime() (OSCR) -#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) +#include -#else +#ifndef xip_irqpending #warning "missing IRQ and timer primitives for XIP MTD support" #warning "some of the XIP MTD support code will be disabled" @@ -85,16 +79,17 @@ #endif +#ifndef xip_iprefetch +#define xip_iprefetch() do { } while (0) +#endif + /* * xip_cpu_idle() is used when waiting for a delay equal or larger than * the system timer tick period. This should put the CPU into idle mode * to save power and to be woken up only when some interrupts are pending. - * As above, this should not rely upon standard kernel code. + * This should not rely upon standard kernel code. */ - -#if defined(CONFIG_CPU_XSCALE) -#define xip_cpu_idle() asm volatile ("mcr p14, 0, %0, c7, c0, 0" :: "r" (1)) -#else +#ifndef xip_cpu_idle #define xip_cpu_idle() do { } while (0) #endif -- cgit v1.2.3-70-g09d2 From cb2c0233755429037462e16ea0d5497a0092738c Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 7 Jul 2005 17:56:03 -0700 Subject: [PATCH] export generic_drop_inode() to modules OCFS2 wants to mark an inode which has been orphaned by another node so that during final iput it takes the correct path through the VFS and can pass through the OCFS2 delete_inode callback. Since i_nlink can get out of date with other nodes, the best way I see to accomplish this is by clearing i_nlink on those inodes at drop_inode time. Other than this small amount of work, nothing different needs to happen, so I think it would be cleanest to be able to just call generic_drop_inode at the end of the OCFS2 drop_inode callback. Signed-off-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 4 +++- include/linux/fs.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 1f9a3a2b89b..6d695037a0a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1052,7 +1052,7 @@ static void generic_forget_inode(struct inode *inode) * inode when the usage count drops to zero, and * i_nlink is zero. */ -static void generic_drop_inode(struct inode *inode) +void generic_drop_inode(struct inode *inode) { if (!inode->i_nlink) generic_delete_inode(inode); @@ -1060,6 +1060,8 @@ static void generic_drop_inode(struct inode *inode) generic_forget_inode(inode); } +EXPORT_SYMBOL_GPL(generic_drop_inode); + /* * Called when we're dropping the last reference * to an inode. diff --git a/include/linux/fs.h b/include/linux/fs.h index 047bde30836..302ec20838c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1435,6 +1435,7 @@ extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); +extern void generic_drop_inode(struct inode *inode); extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); -- cgit v1.2.3-70-g09d2 From 79b9ce311e192e9a31fd9f3cf1ee4a4edf9e2650 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 7 Jul 2005 17:56:04 -0700 Subject: [PATCH] print order information when OOM killing Dump the current allocation order when OOM killing. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sysrq.c | 2 +- include/linux/swap.h | 2 +- mm/oom_kill.c | 4 ++-- mm/page_alloc.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index af79805b557..12d563c648f 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -228,7 +228,7 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(void *ignored) { - out_of_memory(GFP_KERNEL); + out_of_memory(GFP_KERNEL, 0); } static DECLARE_WORK(moom_work, moom_callback, NULL); diff --git a/include/linux/swap.h b/include/linux/swap.h index 2343f999e6e..c75954f2d86 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -148,7 +148,7 @@ struct swap_list_t { #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) /* linux/mm/oom_kill.c */ -extern void out_of_memory(unsigned int __nocast gfp_mask); +extern void out_of_memory(unsigned int __nocast gfp_mask, int order); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 59666d905f1..e20d559edba 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -253,12 +253,12 @@ static struct mm_struct *oom_kill_process(struct task_struct *p) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -void out_of_memory(unsigned int __nocast gfp_mask) +void out_of_memory(unsigned int __nocast gfp_mask, int order) { struct mm_struct *mm = NULL; task_t * p; - printk("oom-killer: gfp_mask=0x%x\n", gfp_mask); + printk("oom-killer: gfp_mask=0x%x, order=%d\n", gfp_mask, order); /* print memory stats */ show_mem(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c9f7f88112..7fbd3ea8765 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -936,7 +936,7 @@ rebalance: goto got_pg; } - out_of_memory(gfp_mask); + out_of_memory(gfp_mask, order); goto restart; } -- cgit v1.2.3-70-g09d2 From cf36680887d6d942d2119c1ff1dfb2428b0f21f4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:13 -0700 Subject: [PATCH] move ioprio syscalls into syscalls.h - Make ioprio syscalls return long, like set/getpriority syscalls. - Move function prototypes into syscalls.h so we can pick them up in the 32/64bit compat code. Signed-off-by: Anton Blanchard Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ioprio.c | 4 ++-- include/linux/ioprio.h | 3 --- include/linux/syscalls.h | 3 +++ 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/ioprio.c b/fs/ioprio.c index 663e420636d..97e1f088ba0 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -43,7 +43,7 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) return 0; } -asmlinkage int sys_ioprio_set(int which, int who, int ioprio) +asmlinkage long sys_ioprio_set(int which, int who, int ioprio) { int class = IOPRIO_PRIO_CLASS(ioprio); int data = IOPRIO_PRIO_DATA(ioprio); @@ -115,7 +115,7 @@ asmlinkage int sys_ioprio_set(int which, int who, int ioprio) return ret; } -asmlinkage int sys_ioprio_get(int which, int who) +asmlinkage long sys_ioprio_get(int which, int who) { struct task_struct *g, *p; struct user_struct *user; diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 8a453a0b5e4..88d5961f7a3 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -34,9 +34,6 @@ enum { */ #define IOPRIO_BE_NR (8) -asmlinkage int sys_ioprio_set(int, int, int); -asmlinkage int sys_ioprio_get(int, int); - enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 52830b6d94e..425f58c8ea4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -506,4 +506,7 @@ asmlinkage long sys_request_key(const char __user *_type, asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); +asmlinkage long sys_ioprio_set(int which, int who, int ioprio); +asmlinkage long sys_ioprio_get(int which, int who); + #endif -- cgit v1.2.3-70-g09d2 From e00d9967e3addea86dded46deefc5daec5d52e5a Mon Sep 17 00:00:00 2001 From: Bernard Blackham Date: Thu, 7 Jul 2005 17:56:42 -0700 Subject: [PATCH] pm: fix u32 vs. pm_message_t confusion in cpufreq Fix u32 vs pm_message_t confusion in cpufreq. Signed-off-by: Bernard Blackham Signed-off-by: Pavel Machek Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/platforms/pmac_cpufreq.c | 2 +- drivers/cpufreq/cpufreq.c | 4 ++-- include/linux/cpufreq.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc/platforms/pmac_cpufreq.c b/arch/ppc/platforms/pmac_cpufreq.c index 5fdd4f607a4..c0605244edd 100644 --- a/arch/ppc/platforms/pmac_cpufreq.c +++ b/arch/ppc/platforms/pmac_cpufreq.c @@ -452,7 +452,7 @@ static u32 __pmac read_gpio(struct device_node *np) return offset; } -static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, u32 state) +static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, pm_message_t pmsg) { /* Ok, this could be made a bit smarter, but let's be robust for now. We * always force a speed change to high speed before sleep, to make sure diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bf62dfe4976..7a7859dd0d9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -869,7 +869,7 @@ EXPORT_SYMBOL(cpufreq_get); * cpufreq_suspend - let the low level driver prepare for suspend */ -static int cpufreq_suspend(struct sys_device * sysdev, u32 state) +static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg) { int cpu = sysdev->id; unsigned int ret = 0; @@ -897,7 +897,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, u32 state) } if (cpufreq_driver->suspend) { - ret = cpufreq_driver->suspend(cpu_policy, state); + ret = cpufreq_driver->suspend(cpu_policy, pmsg); if (ret) { printk(KERN_ERR "cpufreq: suspend failed in ->suspend " "step on CPU %u\n", cpu_policy->cpu); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 927daa86c9b..ff7f80f48df 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -201,7 +201,7 @@ struct cpufreq_driver { /* optional */ int (*exit) (struct cpufreq_policy *policy); - int (*suspend) (struct cpufreq_policy *policy, u32 state); + int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); struct freq_attr **attr; }; -- cgit v1.2.3-70-g09d2 From a39722034ae37f80a1803bf781fe3fe1b03e20bc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 7 Jul 2005 17:56:56 -0700 Subject: [PATCH] page_uptodate locking scalability Use a bit spin lock in the first buffer of the page to synchronise asynch IO buffer completions, instead of the global page_uptodate_lock, which is showing some scalabilty problems. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 25 +++++++++++++++++-------- include/linux/buffer_head.h | 3 +++ 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 561e63a1496..6a25d7df89b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -513,8 +513,8 @@ static void free_more_memory(void) */ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) { - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; + struct buffer_head *first; struct buffer_head *tmp; struct page *page; int page_uptodate = 1; @@ -536,7 +536,9 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) * two buffer heads end IO at almost the same time and both * decide that the page is now completely done. */ - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -549,7 +551,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); /* * If none of the buffers had errors and they are all @@ -561,7 +564,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } @@ -572,8 +576,8 @@ still_busy: void end_buffer_async_write(struct buffer_head *bh, int uptodate) { char b[BDEVNAME_SIZE]; - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; + struct buffer_head *first; struct buffer_head *tmp; struct page *page; @@ -594,7 +598,10 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) SetPageError(page); } - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + clear_buffer_async_write(bh); unlock_buffer(bh); tmp = bh->b_this_page; @@ -605,12 +612,14 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); end_page_writeback(page); return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 802c91e9b3d..90828493791 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -19,6 +19,9 @@ enum bh_state_bits { BH_Dirty, /* Is dirty */ BH_Lock, /* Is locked */ BH_Req, /* Has been submitted for I/O */ + BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise + * IO completion of other buffers in the page + */ BH_Mapped, /* Has a disk mapping */ BH_New, /* Disk mapping was newly created by get_block */ -- cgit v1.2.3-70-g09d2 From 0db925af1db5f3dfe1691c35b39496e2baaff9c9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Jul 2005 17:56:58 -0700 Subject: [PATCH] propagate __nocast annotations Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++-- include/linux/slab.h | 4 ++-- include/linux/string.h | 2 +- mm/mempool.c | 2 +- mm/slab.c | 12 +++++++----- 5 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8d6bf608b19..7c7400137e9 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -12,8 +12,8 @@ struct vm_area_struct; * GFP bitmasks.. */ /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ -#define __GFP_DMA 0x01 -#define __GFP_HIGHMEM 0x02 +#define __GFP_DMA 0x01u +#define __GFP_HIGHMEM 0x02u /* * Action modifiers - doesn't change the zoning diff --git a/include/linux/slab.h b/include/linux/slab.h index 76cf7e60216..4c8e552471b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -65,7 +65,7 @@ extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags); +extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -105,7 +105,7 @@ extern unsigned int ksize(const void *); #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node(kmem_cache_t *, int flags, int node); -extern void *kmalloc_node(size_t size, int flags, int node); +extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node); #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) { diff --git a/include/linux/string.h b/include/linux/string.h index 93994c61309..dab2652acbd 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -88,7 +88,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif -extern char *kstrdup(const char *s, int gfp); +extern char *kstrdup(const char *s, unsigned int __nocast gfp); #ifdef __cplusplus } diff --git a/mm/mempool.c b/mm/mempool.c index 9a72f7d918f..65f2957b8d5 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -205,7 +205,7 @@ void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask) void *element; unsigned long flags; wait_queue_t wait; - int gfp_temp; + unsigned int gfp_temp; might_sleep_if(gfp_mask & __GFP_WAIT); diff --git a/mm/slab.c b/mm/slab.c index e57abd45eed..c9e706db463 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -584,7 +584,8 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep) return cachep->array[smp_processor_id()]; } -static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags) +static inline kmem_cache_t *__find_general_cachep(size_t size, + unsigned int __nocast gfpflags) { struct cache_sizes *csizep = malloc_sizes; @@ -608,7 +609,8 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags) return csizep->cs_cachep; } -kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags) +kmem_cache_t *kmem_find_general_cachep(size_t size, + unsigned int __nocast gfpflags) { return __find_general_cachep(size, gfpflags); } @@ -2100,7 +2102,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags) #if DEBUG static void * cache_alloc_debugcheck_after(kmem_cache_t *cachep, - unsigned long flags, void *objp, void *caller) + unsigned int __nocast flags, void *objp, void *caller) { if (!objp) return objp; @@ -2442,7 +2444,7 @@ got_slabp: } EXPORT_SYMBOL(kmem_cache_alloc_node); -void *kmalloc_node(size_t size, int flags, int node) +void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) { kmem_cache_t *cachep; @@ -3094,7 +3096,7 @@ unsigned int ksize(const void *objp) * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory */ -char *kstrdup(const char *s, int gfp) +char *kstrdup(const char *s, unsigned int __nocast gfp) { size_t len; char *buf; -- cgit v1.2.3-70-g09d2 From 6c036527a630720063b67d9a65455e8caca2c8fa Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 7 Jul 2005 17:56:59 -0700 Subject: [PATCH] mostly_read data section Add a new section called ".data.read_mostly" for data items that are read frequently and rarely written to like cpumaps etc. If these maps are placed in the .data section then these frequenly read items may end up in cachelines with data is is frequently updated. In that case all processors in an SMP system must needlessly reload the cachelines again and again containing elements of those frequently used variables. The ability to share these cachelines will allow each cpu in an SMP system to keep local copies of those shared cachelines thereby optimizing performance. Signed-off-by: Alok N Kataria Signed-off-by: Shobhit Dayal Signed-off-by: Christoph Lameter Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel.c | 2 +- arch/i386/kernel/smpboot.c | 18 +++++++++--------- arch/i386/kernel/time.c | 2 +- arch/i386/kernel/timers/timer_hpet.c | 4 ++-- arch/i386/kernel/vmlinux.lds.S | 3 +++ arch/x86_64/kernel/vmlinux.lds.S | 4 ++++ drivers/char/random.c | 2 +- fs/bio.c | 2 +- include/linux/cache.h | 6 ++++++ kernel/profile.c | 4 ++-- lib/radix-tree.c | 2 +- 11 files changed, 31 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 96a75d04583..a2c33c1a46c 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -25,7 +25,7 @@ extern int trap_init_f00f_bug(void); /* * Alignment at which movsl is preferred for bulk memory copies. */ -struct movsl_mask movsl_mask; +struct movsl_mask movsl_mask __read_mostly; #endif void __devinit early_intel_workaround(struct cpuinfo_x86 *c) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index d66bf489a2e..8ac8e9fd561 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -68,21 +68,21 @@ EXPORT_SYMBOL(smp_num_siblings); #endif /* Package ID of each logical CPU */ -int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(phys_proc_id); /* Core ID of each logical CPU */ -int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); -cpumask_t cpu_sibling_map[NR_CPUS]; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); -cpumask_t cpu_core_map[NR_CPUS]; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* bitmap of online cpus */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; @@ -100,7 +100,7 @@ static int __devinitdata tsc_sync_disabled; struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); -u8 x86_cpu_to_apicid[NR_CPUS] = +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0xff }; EXPORT_SYMBOL(x86_cpu_to_apicid); @@ -550,10 +550,10 @@ extern struct { #ifdef CONFIG_NUMA /* which logical CPUs are on which nodes */ -cpumask_t node_2_cpu_mask[MAX_NUMNODES] = +cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; /* which node each logical CPU is on */ -int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; +int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_2_node); /* set up a mapping between cpu and node. */ @@ -581,7 +581,7 @@ static inline void unmap_cpu_to_node(int cpu) #endif /* CONFIG_NUMA */ -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; static void map_cpu_to_logical_apicid(void) { diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 2854c357377..0ee9dee8af0 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -struct timer_opts *cur_timer = &timer_none; +struct timer_opts *cur_timer __read_mostly = &timer_none; /* * This is a special lock that is owned by the CPU and holds the index diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d766e0963ac..ef8dac5dd33 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -18,7 +18,7 @@ #include "mach_timer.h" #include -static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */ +static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */ static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ static unsigned long hpet_last; /* hpet counter value at last tick*/ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ @@ -180,7 +180,7 @@ static int __init init_hpet(char* override) /************************************************************/ /* tsc timer_opts struct */ -static struct timer_opts timer_hpet = { +static struct timer_opts timer_hpet __read_mostly = { .name = "hpet", .mark_offset = mark_offset_hpet, .get_offset = get_offset_hpet, diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 7e01a528a83..761972f8cb6 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -57,6 +57,9 @@ SECTIONS *(.data.cacheline_aligned) } + /* rarely changed data like cpu maps */ + . = ALIGN(32); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } _edata = .; /* End of data section */ . = ALIGN(THREAD_SIZE); /* init_task */ diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 73389f51c4e..61c12758ca7 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -56,6 +56,10 @@ SECTIONS .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + } #define VSYSCALL_ADDR (-10*1024*1024) #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) diff --git a/drivers/char/random.c b/drivers/char/random.c index 460b5d475ed..6b11d6b2129 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -271,7 +271,7 @@ static int random_write_wakeup_thresh = 128; * samples to avoid wasting CPU time and reduce lock contention. */ -static int trickle_thresh = INPUT_POOL_WORDS * 28; +static int trickle_thresh __read_mostly = INPUT_POOL_WORDS * 28; static DEFINE_PER_CPU(int, trickle_count) = 0; diff --git a/fs/bio.c b/fs/bio.c index 3a1472acc36..ca8f7a850fe 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -52,7 +52,7 @@ struct biovec_slab { */ #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] = { +static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), }; #undef BV diff --git a/include/linux/cache.h b/include/linux/cache.h index 4d767b93738..2b66a36d85f 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -13,6 +13,12 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif +#ifdef CONFIG_X86 +#define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#else +#define __read_mostly +#endif + #ifndef ____cacheline_aligned #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif diff --git a/kernel/profile.c b/kernel/profile.c index ad8cbb75ffa..f89248e6d70 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -35,11 +35,11 @@ struct profile_hit { #define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) /* Oprofile timer tick hook */ -int (*timer_hook)(struct pt_regs *); +int (*timer_hook)(struct pt_regs *) __read_mostly; static atomic_t *prof_buffer; static unsigned long prof_len, prof_shift; -static int prof_on; +static int prof_on __read_mostly; static cpumask_t prof_cpu_mask = CPU_MASK_ALL; #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 04d664377f2..10bed1c8c3c 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -58,7 +58,7 @@ struct radix_tree_path { #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) #define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) -static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH]; +static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; /* * Radix tree node cache. -- cgit v1.2.3-70-g09d2 From 1ce88cf466f7b6078b14d67d186a3d7c19dd5609 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:24 -0700 Subject: [PATCH] namespace.c: fix race in mark_mounts_for_expiry() This patch fixes a race found by Ram in mark_mounts_for_expiry() in fs/namespace.c. The bug can only be triggered with simultaneous exiting of a process having a private namespace, and expiry of a mount from within that namespace. It's practically impossible to trigger, and I haven't even tried. But still, a bug is a bug. The race happens when put_namespace() is called by another task, while mark_mounts_for_expiry() is between atomic_read() and get_namespace(). In that case get_namespace() will be called on an already dead namespace with unforeseeable results. The solution was suggested by Al Viro, with his own words: Instead of screwing with atomic_read() in there, why don't we simply do the following: a) atomic_dec_and_lock() in put_namespace() b) __put_namespace() called without dropping lock c) the first thing done by __put_namespace would be struct vfsmount *root = namespace->root; namespace->root = NULL; spin_unlock(...); .... umount_tree(root); ... d) check in mark_... would be simply namespace && namespace->root. And we are all set; no screwing around with atomic_read(), no magic at all. Dying namespace gets NULL ->root. All changes of ->root happen under spinlock. If under a spinlock we see non-NULL ->mnt_namespace, it won't be freed until we drop the lock (we will set ->mnt_namespace to NULL under that lock before we get to freeing namespace). If under a spinlock we see non-NULL ->mnt_namespace and ->mnt_namespace->root, we can grab a reference to namespace and be sure that it won't go away. Signed-off-by: Miklos Szeredi Acked-by: Al Viro Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 7 +++++-- include/linux/namespace.h | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index a0d0ef1f1a4..9d17541ebaf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -869,7 +869,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ namespace = mnt->mnt_namespace; - if (!namespace || atomic_read(&namespace->count) <= 0) + if (!namespace || !namespace->root) continue; get_namespace(namespace); @@ -1450,9 +1450,12 @@ void __init mnt_init(unsigned long mempages) void __put_namespace(struct namespace *namespace) { + struct vfsmount *root = namespace->root; + namespace->root = NULL; + spin_unlock(&vfsmount_lock); down_write(&namespace->sem); spin_lock(&vfsmount_lock); - umount_tree(namespace->root); + umount_tree(root); spin_unlock(&vfsmount_lock); up_write(&namespace->sem); kfree(namespace); diff --git a/include/linux/namespace.h b/include/linux/namespace.h index 697991b69f9..0e5a86f13b2 100644 --- a/include/linux/namespace.h +++ b/include/linux/namespace.h @@ -17,7 +17,8 @@ extern void __put_namespace(struct namespace *namespace); static inline void put_namespace(struct namespace *namespace) { - if (atomic_dec_and_test(&namespace->count)) + if (atomic_dec_and_lock(&namespace->count, &vfsmount_lock)) + /* releases vfsmount_lock */ __put_namespace(namespace); } -- cgit v1.2.3-70-g09d2 From 55e700b924f9e0ba24e3a071d1097d050b05abe6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename mnt_fslink to mnt_expire This patch renames vfsmount->mnt_fslink to something a little more descriptive: vfsmount->mnt_expire. Signed-off-by: Mike Waychison Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 24 ++++++++++++------------ include/linux/mount.h | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index b168dc37eaa..587eb0d707e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -61,7 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); - INIT_LIST_HEAD(&mnt->mnt_fslink); + INIT_LIST_HEAD(&mnt->mnt_expire); if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -165,8 +165,8 @@ clone_mnt(struct vfsmount *old, struct dentry *root) /* stick the duplicate mount on the same expiry list * as the original if that was on one */ spin_lock(&vfsmount_lock); - if (!list_empty(&old->mnt_fslink)) - list_add(&mnt->mnt_fslink, &old->mnt_fslink); + if (!list_empty(&old->mnt_expire)) + list_add(&mnt->mnt_expire, &old->mnt_expire); spin_unlock(&vfsmount_lock); } return mnt; @@ -351,7 +351,7 @@ static void umount_tree(struct vfsmount *mnt) while (!list_empty(&kill)) { mnt = list_entry(kill.next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); - list_del_init(&mnt->mnt_fslink); + list_del_init(&mnt->mnt_expire); if (mnt->mnt_parent == mnt) { spin_unlock(&vfsmount_lock); } else { @@ -645,7 +645,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) if (mnt) { /* stop bind mounts from expiring */ spin_lock(&vfsmount_lock); - list_del_init(&mnt->mnt_fslink); + list_del_init(&mnt->mnt_expire); spin_unlock(&vfsmount_lock); err = graft_tree(mnt, nd); @@ -744,7 +744,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) /* if the mount is moved, it should no longer be expire * automatically */ - list_del_init(&old_nd.mnt->mnt_fslink); + list_del_init(&old_nd.mnt->mnt_expire); out2: spin_unlock(&vfsmount_lock); out1: @@ -814,7 +814,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, if (err == 0 && fslist) { /* add to the specified expiration list */ spin_lock(&vfsmount_lock); - list_add_tail(&newmnt->mnt_fslink, fslist); + list_add_tail(&newmnt->mnt_expire, fslist); spin_unlock(&vfsmount_lock); } @@ -869,7 +869,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) * Someone brought it back to life whilst we didn't have any * locks held so return it to the expiration list */ - list_add_tail(&mnt->mnt_fslink, mounts); + list_add_tail(&mnt->mnt_expire, mounts); spin_unlock(&vfsmount_lock); } } @@ -896,13 +896,13 @@ void mark_mounts_for_expiry(struct list_head *mounts) * - still marked for expiry (marked on the last call here; marks are * cleared by mntput()) */ - list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) { + list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { if (!xchg(&mnt->mnt_expiry_mark, 1) || atomic_read(&mnt->mnt_count) != 1) continue; mntget(mnt); - list_move(&mnt->mnt_fslink, &graveyard); + list_move(&mnt->mnt_expire, &graveyard); } /* @@ -912,8 +912,8 @@ void mark_mounts_for_expiry(struct list_head *mounts) * - dispose of the corpse */ while (!list_empty(&graveyard)) { - mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink); - list_del_init(&mnt->mnt_fslink); + mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); + list_del_init(&mnt->mnt_expire); /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ diff --git a/include/linux/mount.h b/include/linux/mount.h index 8b8d3b9beef..196d2d6de4a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -34,7 +34,7 @@ struct vfsmount int mnt_expiry_mark; /* true if marked for expiry */ char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; - struct list_head mnt_fslink; /* link in fs-specific expiry list */ + struct list_head mnt_expire; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ }; -- cgit v1.2.3-70-g09d2 From 751c404b8f63e8199d5f2f8f2bcfd69b41d11caa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename _mntput to mntput_no_expire This patch renames _mntput() to something a little more descriptive: mntput_no_expire(). Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 2 +- include/linux/mount.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index fa8df81ce8c..1d93cb4f7c5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -314,7 +314,7 @@ void path_release(struct nameidata *nd) void path_release_on_umount(struct nameidata *nd) { dput(nd->dentry); - _mntput(nd->mnt); + mntput_no_expire(nd->mnt); } /* diff --git a/include/linux/mount.h b/include/linux/mount.h index 196d2d6de4a..74b4727a4e3 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -47,7 +47,7 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) extern void __mntput(struct vfsmount *mnt); -static inline void _mntput(struct vfsmount *mnt) +static inline void mntput_no_expire(struct vfsmount *mnt) { if (mnt) { if (atomic_dec_and_test(&mnt->mnt_count)) @@ -59,7 +59,7 @@ static inline void mntput(struct vfsmount *mnt) { if (mnt) { mnt->mnt_expiry_mark = 0; - _mntput(mnt); + mntput_no_expire(mnt); } } -- cgit v1.2.3-70-g09d2 From a6ccbbb8865101d83c2e716f08feae1da1c48584 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:11 -0700 Subject: [PATCH] nfsd4: fix sync'ing of recovery directory We need to fsync the recovery directory after writing to it, but we weren't doing this correctly. (For example, we weren't taking the i_sem when calling ->fsync().) Just reuse the existing nfsd fsync code instead. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4recover.c | 29 ++++++++--------------------- fs/nfsd/vfs.c | 2 +- include/linux/nfsd/nfsd.h | 1 + 3 files changed, 10 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 095f1740f3a..bb40083b6b7 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -119,25 +119,12 @@ out: return status; } -static int -nfsd4_rec_fsync(struct dentry *dentry) +static void +nfsd4_sync_rec_dir(void) { - struct file *filp; - int status = nfs_ok; - - dprintk("NFSD: nfs4_fsync_rec_dir\n"); - filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR); - if (IS_ERR(filp)) { - status = PTR_ERR(filp); - goto out; - } - if (filp->f_op && filp->f_op->fsync) - status = filp->f_op->fsync(filp, filp->f_dentry, 0); - fput(filp); -out: - if (status) - printk("nfsd4: unable to sync recovery directory\n"); - return status; + down(&rec_dir.dentry->d_inode->i_sem); + nfsd_sync_dir(rec_dir.dentry); + up(&rec_dir.dentry->d_inode->i_sem); } int @@ -176,7 +163,7 @@ out_unlock: up(&rec_dir.dentry->d_inode->i_sem); if (status == 0) { clp->cl_firststate = 1; - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); } nfs4_reset_user(uid, gid); dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); @@ -331,7 +318,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); nfs4_reset_user(uid, gid); if (status == 0) - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); if (status) printk("NFSD: Failed to remove expired client state directory" " %.*s\n", HEXDIR_LEN, clp->cl_recdir); @@ -362,7 +349,7 @@ nfsd4_recdir_purge_old(void) { return; status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); if (status == 0) - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); if (status) printk("nfsd4: failed to purge old clients from recovery" " directory %s\n", rec_dir.dentry->d_name.name); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index be24ead89d9..5e0bf391760 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -733,7 +733,7 @@ nfsd_sync(struct file *filp) up(&inode->i_sem); } -static void +void nfsd_sync_dir(struct dentry *dp) { nfsd_dosync(NULL, dp, dp->d_inode->i_fop); diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 5791dfd30dd..c2da1b62d41 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -124,6 +124,7 @@ int nfsd_statfs(struct svc_rqst *, struct svc_fh *, int nfsd_notify_change(struct inode *, struct iattr *); int nfsd_permission(struct svc_export *, struct dentry *, int); +void nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL -- cgit v1.2.3-70-g09d2 From 7fb64cee34f5dc743f697041717cafda8a94b5ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:20 -0700 Subject: [PATCH] nfsd4: seqid comments Add some comments on the use of so_seqid, in an attempt to avoid some of the confusion outlined in the previous patch.... Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4xdr.c | 8 ++++---- include/linux/nfsd/state.h | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5207068cde1..1515c5b8096 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1210,10 +1210,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) save = resp->p; /* - * Routine for encoding the result of a - * "seqid-mutating" NFSv4 operation. This is - * where seqids are incremented, and the - * replay cache is filled. + * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This + * is where sequence id's are incremented, and the replay cache is filled. + * Note that we increment sequence id's here, at the last moment, so we're sure + * we know whether the error to be returned is a sequence id mutating error. */ #define ENCODE_SEQID_OP_TAIL(stateowner) do { \ diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index a84a3fa99be..2d19431f47e 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -203,7 +203,9 @@ struct nfs4_stateowner { int so_is_open_owner; /* 1=openowner,0=lockowner */ u32 so_id; struct nfs4_client * so_client; - u32 so_seqid; + /* after increment in ENCODE_SEQID_OP_TAIL, represents the next + * sequence id expected from the client: */ + u32 so_seqid; struct xdr_netobj so_owner; /* open owner name */ int so_confirmed; /* successful OPEN_CONFIRM? */ struct nfs4_replay so_replay; -- cgit v1.2.3-70-g09d2 From b700949b781480819e53bdc38a53f053226dd75e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:23 -0700 Subject: [PATCH] nfsd4: return better error on io incompatible with open mode from RFC 3530: "Share reservations are established by OPEN operations and by their nature are mandatory in that when the OPEN denies READ or WRITE operations, that denial results in such operations being rejected with error NFS4ERR_LOCKED." (Note that share_denied is really only a legal error for OPEN.) Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 2 +- include/linux/nfsd/nfsd.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b96714ae3dd..3647c942915 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1295,7 +1295,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) fp = find_file(ino); if (!fp) return nfs_ok; - ret = nfserr_share_denied; + ret = nfserr_locked; /* Search for conflicting share reservations */ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { if (test_bit(deny_type, &stp->st_deny_bmap) || diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index c2da1b62d41..6d5a24f3fc6 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -231,6 +231,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_reclaim_bad __constant_htonl(NFSERR_RECLAIM_BAD) #define nfserr_badname __constant_htonl(NFSERR_BADNAME) #define nfserr_cb_path_down __constant_htonl(NFSERR_CB_PATH_DOWN) +#define nfserr_locked __constant_htonl(NFSERR_LOCKED) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. -- cgit v1.2.3-70-g09d2 From 4c4cd222ee329025840bc2f8cebf71d36c62440c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:27 -0700 Subject: [PATCH] nfsd4: check lock type against openmode. We shouldn't be allowing, e.g., write locks on files not open for read. To enforce this, we add a pointer from the lock stateid back to the open stateid it came from, so that the check will continue to be correct even after the open is upgraded or downgraded. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 49 +++++++++++++++++++++++++++++++--------------- include/linux/nfsd/state.h | 5 +++++ 2 files changed, 38 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 59b214f01b6..b83f8fb441e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1160,6 +1160,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * stp->st_deny_bmap = 0; __set_bit(open->op_share_access, &stp->st_access_bmap); __set_bit(open->op_share_deny, &stp->st_deny_bmap); + stp->st_openstp = NULL; } static void @@ -2158,12 +2159,18 @@ out: return status; } +static inline int +setlkflg (int type) +{ + return (type == NFS4_READW_LT || type == NFS4_READ_LT) ? + RD_STATE : WR_STATE; +} /* * Checks for sequence id mutating operations. */ static int -nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) +nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock) { struct nfs4_stateid *stp; struct nfs4_stateowner *sop; @@ -2201,21 +2208,31 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei goto check_replay; } - /* for new lock stateowners: - * check that the lock->v.new.open_stateid - * refers to an open stateowner - * - * check that the lockclid (nfs4_lock->v.new.clientid) is the same - * as the open_stateid->st_stateowner->so_client->clientid - */ - if (lockclid) { + if (lock) { struct nfs4_stateowner *sop = stp->st_stateowner; + clientid_t *lockclid = &lock->v.new.clientid; struct nfs4_client *clp = sop->so_client; + int lkflg = 0; + int status; + + lkflg = setlkflg(lock->lk_type); + + if (lock->lk_is_new) { + if (!sop->so_is_open_owner) + return nfserr_bad_stateid; + if (!cmp_clid(&clp->cl_clientid, lockclid)) + return nfserr_bad_stateid; + /* stp is the open stateid */ + status = nfs4_check_openmode(stp, lkflg); + if (status) + return status; + } else { + /* stp is the lock stateid */ + status = nfs4_check_openmode(stp->st_openstp, lkflg); + if (status) + return status; + } - if (!sop->so_is_open_owner) - return nfserr_bad_stateid; - if (!cmp_clid(&clp->cl_clientid, lockclid)) - return nfserr_bad_stateid; } if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { @@ -2642,6 +2659,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ stp->st_access_bmap = open_stp->st_access_bmap; stp->st_deny_bmap = open_stp->st_deny_bmap; + stp->st_openstp = open_stp; out: return stp; @@ -2697,8 +2715,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock lock->lk_new_open_seqid, &lock->lk_new_open_stateid, CHECK_FH | OPEN_STATE, - &open_sop, &open_stp, - &lock->v.new.clientid); + &open_sop, &open_stp, lock); if (status) goto out; /* create lockowner and lock stateid */ @@ -2726,7 +2743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, CHECK_FH | LOCK_STATE, - &lock->lk_stateowner, &lock_stp, NULL); + &lock->lk_stateowner, &lock_stp, lock); if (status) goto out; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 2d19431f47e..8bf23cf8b60 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -237,6 +237,10 @@ struct nfs4_file { * st_perlockowner: (open stateid) list of lock nfs4_stateowners * st_access_bmap: used only for open stateid * st_deny_bmap: used only for open stateid +* st_openstp: open stateid lock stateid was derived from +* +* XXX: open stateids and lock stateids have diverged sufficiently that +* we should consider defining separate structs for the two cases. */ struct nfs4_stateid { @@ -250,6 +254,7 @@ struct nfs4_stateid { struct file * st_vfs_file; unsigned long st_access_bmap; unsigned long st_deny_bmap; + struct nfs4_stateid * st_openstp; }; /* flags for preprocess_seqid_op() */ -- cgit v1.2.3-70-g09d2 From 86a76caf8705e3524e15f343f3c4806939a06dc8 Mon Sep 17 00:00:00 2001 From: Victor Fusco Date: Fri, 8 Jul 2005 14:57:47 -0700 Subject: [NET]: Fix sparse warnings From: Victor Fusco Fix the sparse warning "implicit cast to nocast type" Signed-off-by: Victor Fusco Signed-off-by: Domen Puncer Signed-off-by: David S. Miller --- include/linux/skbuff.h | 29 ++++++++++++++++++----------- include/net/sock.h | 18 +++++++++++------- include/net/tcp.h | 3 ++- net/core/dev.c | 2 +- net/core/skbuff.c | 17 ++++++++++------- net/core/sock.c | 11 +++++++---- net/ipv4/tcp_output.c | 2 +- 7 files changed, 50 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b95041349..5d4a990d557 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -300,20 +300,26 @@ struct sk_buff { #include extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff *alloc_skb(unsigned int size, int priority); +extern struct sk_buff *alloc_skb(unsigned int size, + unsigned int __nocast priority); extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int priority); + unsigned int size, + unsigned int __nocast priority); extern void kfree_skbmem(struct sk_buff *skb); -extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority); -extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority); -extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask); +extern struct sk_buff *skb_clone(struct sk_buff *skb, + unsigned int __nocast priority); +extern struct sk_buff *skb_copy(const struct sk_buff *skb, + unsigned int __nocast priority); +extern struct sk_buff *pskb_copy(struct sk_buff *skb, + unsigned int __nocast gfp_mask); extern int pskb_expand_head(struct sk_buff *skb, - int nhead, int ntail, int gfp_mask); + int nhead, int ntail, + unsigned int __nocast gfp_mask); extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, - int priority); + unsigned int __nocast priority); extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) kfree_skb(a) extern void skb_over_panic(struct sk_buff *skb, int len, @@ -464,7 +470,8 @@ static inline int skb_shared(const struct sk_buff *skb) * * NULL is returned on a memory allocation failure. */ -static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, + unsigned int __nocast pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_shared(skb)) { @@ -1001,7 +1008,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) * %NULL is returned in there is no free memory. */ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - int gfp_mask) + unsigned int __nocast gfp_mask) { struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); if (likely(skb)) @@ -1114,8 +1121,8 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, int gfp); -static inline int skb_linearize(struct sk_buff *skb, int gfp) +extern int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp); +static inline int skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp) { return __skb_linearize(skb, gfp); } diff --git a/include/net/sock.h b/include/net/sock.h index 7b76f891ae2..a1042d08bec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -684,16 +684,17 @@ extern void FASTCALL(release_sock(struct sock *sk)); #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -extern struct sock *sk_alloc(int family, int priority, +extern struct sock *sk_alloc(int family, + unsigned int __nocast priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, - int priority); + unsigned int __nocast priority); extern struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - int priority); + unsigned int __nocast priority); extern void sock_wfree(struct sk_buff *skb); extern void sock_rfree(struct sk_buff *skb); @@ -708,7 +709,8 @@ extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode); -extern void *sock_kmalloc(struct sock *sk, int size, int priority); +extern void *sock_kmalloc(struct sock *sk, int size, + unsigned int __nocast priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); extern void sk_send_sigurg(struct sock *sk); @@ -1132,7 +1134,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) } static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, - int size, int mem, int gfp) + int size, int mem, + unsigned int __nocast gfp) { struct sk_buff *skb; int hdr_len; @@ -1155,7 +1158,8 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, } static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk, - int size, int gfp) + int size, + unsigned int __nocast gfp) { return sk_stream_alloc_pskb(sk, size, 0, gfp); } @@ -1188,7 +1192,7 @@ static inline int sock_writeable(const struct sock *sk) return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2); } -static inline int gfp_any(void) +static inline unsigned int __nocast gfp_any(void) { return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } diff --git a/include/net/tcp.h b/include/net/tcp.h index 4d5b12e4dc1..f4f9aba07ac 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -860,7 +860,8 @@ extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); -extern void tcp_send_active_reset(struct sock *sk, int priority); +extern void tcp_send_active_reset(struct sock *sk, + unsigned int __nocast priority); extern int tcp_send_synack(struct sock *); extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); diff --git a/net/core/dev.c b/net/core/dev.c index 7f5f62c6511..ff9dc029233 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1127,7 +1127,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) extern void skb_release_data(struct sk_buff *); /* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, int gfp_mask) +int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) { unsigned int size; u8 *data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 733deee24b9..d9f7b06fe88 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -129,7 +129,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) +struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) { struct sk_buff *skb; u8 *data; @@ -182,7 +182,8 @@ nodata: * %GFP_ATOMIC. */ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int gfp_mask) + unsigned int size, + unsigned int __nocast gfp_mask) { struct sk_buff *skb; u8 *data; @@ -322,7 +323,7 @@ void __kfree_skb(struct sk_buff *skb) * %GFP_ATOMIC. */ -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) +struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) { struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); @@ -460,7 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) * header is going to be modified. Use pskb_copy() instead. */ -struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) +struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask) { int headerlen = skb->data - skb->head; /* @@ -499,7 +500,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) * The returned buffer has a reference count of 1. */ -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) +struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask) { /* * Allocate the copy buffer @@ -557,7 +558,8 @@ out: * reloaded after call to this function. */ -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, + unsigned int __nocast gfp_mask) { int i; u8 *data; @@ -647,7 +649,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) * only by netfilter in the cases when checksum is recalculated? --ANK */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, int gfp_mask) + int newheadroom, int newtailroom, + unsigned int __nocast gfp_mask) { /* * Allocate the copy buffer diff --git a/net/core/sock.c b/net/core/sock.c index a6ec3ada7f9..8b35ccdc2b3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -622,7 +622,8 @@ lenout: * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it) +struct sock *sk_alloc(int family, unsigned int __nocast priority, + struct proto *prot, int zero_it) { struct sock *sk = NULL; kmem_cache_t *slab = prot->slab; @@ -750,7 +751,8 @@ unsigned long sock_i_ino(struct sock *sk) /* * Allocate a skb from the socket's send buffer. */ -struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) +struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, + unsigned int __nocast priority) { if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff * skb = alloc_skb(size, priority); @@ -765,7 +767,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int /* * Allocate a skb from the socket's receive buffer. */ -struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) +struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, + unsigned int __nocast priority) { if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { struct sk_buff *skb = alloc_skb(size, priority); @@ -780,7 +783,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int /* * Allocate a memory block from the socket's option memory buffer. */ -void *sock_kmalloc(struct sock *sk, int size, int priority) +void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority) { if ((unsigned)size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e041d057ec8..e3f8ea1bfa9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1613,7 +1613,7 @@ void tcp_send_fin(struct sock *sk) * was unread data in the receive queue. This behavior is recommended * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM */ -void tcp_send_active_reset(struct sock *sk, int priority) +void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; -- cgit v1.2.3-70-g09d2 From ca9b907d140a5f249250d19f956129dbbbf84f73 Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Fri, 8 Jul 2005 17:38:07 -0700 Subject: [IPV4]: multicast API "join" issues This patch corrects a few problems with the IP_ADD_MEMBERSHIP socket option: 1) The existing code makes an attempt at reference counting joins when using the ip_mreqn/imr_ifindex interface. Joining the same group on the same socket is an error, whatever the API. This leads to unexpected results when mixing ip_mreqn by index with ip_mreqn by address, ip_mreq, or other API's. For example, ip_mreq followed by ip_mreqn of the same group will "work" while the same two reversed will not. Fixed to always return EADDRINUSE on a duplicate join and removed the (now unused) reference count in ip_mc_socklist. 2) The group-search list in ip_mc_join_group() is comparing a full ip_mreqn structure and all of it must match for it to find the group. This doesn't correctly match a group that was joined with ip_mreq or ip_mreqn with an address (with or without an index). It also doesn't match groups that are joined by different addresses on the same interface. All of these are the same multicast group, which is identified by group address and interface index. Fixed the check to correctly match groups so we don't get duplicate group entries on the ip_mc_socklist. 3) The old code allocates a multicast address before searching for duplicates requiring it to free in various error cases. This patch moves the allocate until after the search and igmp_max_memberships check, so never a need to allocate, then free an entry. Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- include/linux/igmp.h | 1 - net/ipv4/igmp.c | 35 ++++++++++++----------------------- 2 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 390e760a96d..0c31ef0b5ba 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -148,7 +148,6 @@ struct ip_sf_socklist struct ip_mc_socklist { struct ip_mc_socklist *next; - int count; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist *sflist; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 1f3183168a9..111eb678cba 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1615,9 +1615,10 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { int err; u32 addr = imr->imr_multiaddr.s_addr; - struct ip_mc_socklist *iml, *i; + struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); + int ifindex; int count = 0; if (!MULTICAST(addr)) @@ -1633,37 +1634,30 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) goto done; } - iml = (struct ip_mc_socklist *)sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); - err = -EADDRINUSE; + ifindex = imr->imr_ifindex; for (i = inet->mc_list; i; i = i->next) { - if (memcmp(&i->multi, imr, sizeof(*imr)) == 0) { - /* New style additions are reference counted */ - if (imr->imr_address.s_addr == 0) { - i->count++; - err = 0; - } + if (i->multi.imr_multiaddr.s_addr == addr && + i->multi.imr_ifindex == ifindex) goto done; - } count++; } err = -ENOBUFS; - if (iml == NULL || count >= sysctl_igmp_max_memberships) + if (count >= sysctl_igmp_max_memberships) + goto done; + iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL); + if (iml == NULL) goto done; + memcpy(&iml->multi, imr, sizeof(*imr)); iml->next = inet->mc_list; - iml->count = 1; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; inet->mc_list = iml; ip_mc_inc_group(in_dev, addr); - iml = NULL; err = 0; - done: rtnl_shunlock(); - if (iml) - sock_kfree_s(sk, iml, sizeof(*iml)); return err; } @@ -1704,12 +1698,6 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) in_dev = inetdev_by_index(iml->multi.imr_ifindex); if (in_dev) (void) ip_mc_leave_src(sk, iml, in_dev); - if (--iml->count) { - rtnl_unlock(); - if (in_dev) - in_dev_put(in_dev); - return 0; - } *imlp = iml->next; @@ -1755,7 +1743,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct err = -EADDRNOTAVAIL; for (pmc=inet->mc_list; pmc; pmc=pmc->next) { - if (memcmp(&pmc->multi, mreqs, 2*sizeof(__u32)) == 0) + if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr + && pmc->multi.imr_ifindex == imr.imr_ifindex) break; } if (!pmc) /* must have a prior join */ -- cgit v1.2.3-70-g09d2 From d369ddd2fc00fc3f46e9052d1017cbf407e3cdf7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jul 2005 15:45:11 -0700 Subject: [SPARC64]: Add __read_mostly support. Signed-off-by: David S. Miller --- arch/sparc64/kernel/smp.c | 10 +++++----- arch/sparc64/kernel/time.c | 24 +++++++++--------------- arch/sparc64/kernel/vmlinux.lds.S | 2 ++ include/linux/cache.h | 2 +- 4 files changed, 17 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index e5b9c7a2778..441fc2e52ce 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -45,8 +45,8 @@ extern void calibrate_delay(void); /* Please don't make this stuff initdata!!! --DaveM */ static unsigned char boot_cpu_id; -cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +cpumask_t cpu_online_map = CPU_MASK_NONE __read_mostly; +cpumask_t phys_cpu_present_map = CPU_MASK_NONE __read_mostly; static cpumask_t smp_commenced_mask; static cpumask_t cpu_callout_map; @@ -155,7 +155,7 @@ void cpu_panic(void) panic("SMP bolixed\n"); } -static unsigned long current_tick_offset; +static unsigned long current_tick_offset __read_mostly; /* This tick register synchronization scheme is taken entirely from * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. @@ -1193,8 +1193,8 @@ void smp_send_stop(void) { } -unsigned long __per_cpu_base; -unsigned long __per_cpu_shift; +unsigned long __per_cpu_base __read_mostly; +unsigned long __per_cpu_shift __read_mostly; EXPORT_SYMBOL(__per_cpu_base); EXPORT_SYMBOL(__per_cpu_shift); diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index b40db389f90..362b9c26871 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -73,7 +73,7 @@ static __initdata struct sparc64_tick_ops dummy_tick_ops = { .get_tick = dummy_get_tick, }; -struct sparc64_tick_ops *tick_ops = &dummy_tick_ops; +struct sparc64_tick_ops *tick_ops __read_mostly = &dummy_tick_ops; #define TICK_PRIV_BIT (1UL << 63) @@ -195,7 +195,7 @@ static unsigned long tick_add_tick(unsigned long adj, unsigned long offset) return new_tick; } -static struct sparc64_tick_ops tick_operations = { +static struct sparc64_tick_ops tick_operations __read_mostly = { .init_tick = tick_init_tick, .get_tick = tick_get_tick, .get_compare = tick_get_compare, @@ -276,7 +276,7 @@ static unsigned long stick_add_compare(unsigned long adj) return new_compare; } -static struct sparc64_tick_ops stick_operations = { +static struct sparc64_tick_ops stick_operations __read_mostly = { .init_tick = stick_init_tick, .get_tick = stick_get_tick, .get_compare = stick_get_compare, @@ -422,7 +422,7 @@ static unsigned long hbtick_add_compare(unsigned long adj) return val; } -static struct sparc64_tick_ops hbtick_operations = { +static struct sparc64_tick_ops hbtick_operations __read_mostly = { .init_tick = hbtick_init_tick, .get_tick = hbtick_get_tick, .get_compare = hbtick_get_compare, @@ -437,10 +437,9 @@ static struct sparc64_tick_ops hbtick_operations = { * NOTE: On SUN5 systems the ticker interrupt comes in using 2 * interrupts, one at level14 and one with softint bit 0. */ -unsigned long timer_tick_offset; -unsigned long timer_tick_compare; +unsigned long timer_tick_offset __read_mostly; -static unsigned long timer_ticks_per_nsec_quotient; +static unsigned long timer_ticks_per_nsec_quotient __read_mostly; #define TICK_SIZE (tick_nsec / 1000) @@ -464,7 +463,7 @@ static inline void timer_check_rtc(void) static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) { - unsigned long ticks, pstate; + unsigned long ticks, compare, pstate; write_seqlock(&xtime_lock); @@ -483,14 +482,14 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) : "=r" (pstate) : "i" (PSTATE_IE)); - timer_tick_compare = tick_ops->add_compare(timer_tick_offset); + compare = tick_ops->add_compare(timer_tick_offset); ticks = tick_ops->get_tick(); /* Restore PSTATE_IE. */ __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : /* no outputs */ : "r" (pstate)); - } while (time_after_eq(ticks, timer_tick_compare)); + } while (time_after_eq(ticks, compare)); timer_check_rtc(); @@ -506,11 +505,6 @@ void timer_tick_interrupt(struct pt_regs *regs) do_timer(regs); - /* - * Only keep timer_tick_offset uptodate, but don't set TICK_CMPR. - */ - timer_tick_compare = tick_ops->get_compare() + timer_tick_offset; - timer_check_rtc(); write_sequnlock(&xtime_lock); diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 382fd6798bb..950423da8a6 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -32,6 +32,8 @@ SECTIONS .data1 : { *(.data1) } . = ALIGN(64); .data.cacheline_aligned : { *(.data.cacheline_aligned) } + . = ALIGN(64); + .data.read_mostly : { *(.data.read_mostly) } _edata = .; PROVIDE (edata = .); .fixup : { *(.fixup) } diff --git a/include/linux/cache.h b/include/linux/cache.h index 2b66a36d85f..f6b5a46c5f8 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -13,7 +13,7 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif -#ifdef CONFIG_X86 +#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) #define __read_mostly __attribute__((__section__(".data.read_mostly"))) #else #define __read_mostly -- cgit v1.2.3-70-g09d2 From f7ceba360cce9af3fbc4e5a5b1bd40b570b7021c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jul 2005 19:29:45 -0700 Subject: [SPARC64]: Add syscall auditing support. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 10 +++++----- arch/sparc64/kernel/ptrace.c | 32 +++++++++++++++++++++++++++++--- include/asm-sparc64/thread_info.h | 8 +++++--- include/linux/audit.h | 2 +- init/Kconfig | 2 +- 5 files changed, 41 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 8b7ed760c50..d781f10adc5 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -1552,7 +1552,7 @@ sys_ptrace: add %sp, PTREGS_OFF, %o0 nop .align 32 1: ldx [%curptr + TI_FLAGS], %l5 - andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0 + andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 be,pt %icc, rtrap clr %l6 add %sp, PTREGS_OFF, %o0 @@ -1679,7 +1679,7 @@ linux_sparc_syscall32: srl %i5, 0, %o5 ! IEU1 srl %i2, 0, %o2 ! IEU0 Group - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0 ! IEU0 Group + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 bne,pn %icc, linux_syscall_trace32 ! CTI mov %i0, %l5 ! IEU1 call %l7 ! CTI Group brk forced @@ -1702,7 +1702,7 @@ linux_sparc_syscall: mov %i3, %o3 ! IEU1 mov %i4, %o4 ! IEU0 Group - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0 ! IEU1 Group+1 bubble + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 bne,pn %icc, linux_syscall_trace ! CTI Group mov %i0, %l5 ! IEU0 2: call %l7 ! CTI Group brk forced @@ -1730,7 +1730,7 @@ ret_sys_call: 1: cmp %o0, -ERESTART_RESTARTBLOCK bgeu,pn %xcc, 1f - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %l6 + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6 80: /* System call success, clear Carry condition code. */ andn %g3, %g2, %g3 @@ -1745,7 +1745,7 @@ ret_sys_call: /* System call failure, set Carry condition code. * Also, get abs(errno) to return to the process. */ - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %l6 + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6 sub %g0, %o0, %o0 or %g3, %g2, %g3 stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index c57dc9ea731..23ad839d113 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -633,10 +635,22 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p) /* do the secure computing check first */ secure_computing(regs->u_regs[UREG_G1]); - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + if (unlikely(current->audit_context) && syscall_exit_p) { + unsigned long tstate = regs->tstate; + int result = AUDITSC_SUCCESS; + + if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) + result = AUDITSC_FAILURE; + + audit_syscall_exit(current, result, regs->u_regs[UREG_I0]); + } + if (!(current->ptrace & PT_PTRACED)) - return; + goto out; + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + goto out; + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); @@ -649,4 +663,16 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + +out: + if (unlikely(current->audit_context) && !syscall_exit_p) + audit_syscall_entry(current, + (test_thread_flag(TIF_32BIT) ? + AUDIT_ARCH_SPARC : + AUDIT_ARCH_SPARC64), + regs->u_regs[UREG_G1], + regs->u_regs[UREG_I0], + regs->u_regs[UREG_I1], + regs->u_regs[UREG_I2], + regs->u_regs[UREG_I3]); } diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h index 6b2fbb89bb6..a1d25c06f92 100644 --- a/include/asm-sparc64/thread_info.h +++ b/include/asm-sparc64/thread_info.h @@ -221,7 +221,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TIF_32BIT 7 /* 32-bit binary */ #define TIF_NEWCHILD 8 /* just-spawned child process */ #define TIF_SECCOMP 9 /* secure computing */ -#define TIF_POLLING_NRFLAG 10 +#define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */ #define TIF_SYSCALL_SUCCESS 11 /* NOTE: Thread flags >= 12 should be ones we have no interest * in using in assembly, else we can't use the mask as @@ -229,6 +229,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); */ #define TIF_ABI_PENDING 12 #define TIF_MEMDIE 13 +#define TIF_POLLING_NRFLAG 14 #define _TIF_SYSCALL_TRACE (1< Date: Mon, 11 Jul 2005 14:29:11 -0700 Subject: [NETLINK]: Reserve NETLINK_NETFILTER. Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 27e4d164a10..2f0c085f2c7 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -16,6 +16,7 @@ #define NETLINK_AUDIT 9 /* auditing */ #define NETLINK_FIB_LOOKUP 10 #define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ +#define NETLINK_NETFILTER 12 /* netfilter subsystem */ #define NETLINK_IP6_FW 13 #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ -- cgit v1.2.3-70-g09d2 From e2a5b420f716cd1a46674b1a90389612eced916f Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 18 Mar 2005 16:20:46 -0500 Subject: [ACPI] ACPI poweroff fix Register an "acpi" system device to be notified of shutdown preparation. This depends on CONFIG_PM http://bugzilla.kernel.org/show_bug.cgi?id=4041 Signed-off-by: Alexey Starikovskiy Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/acpi/sleep/main.c | 74 +++++++++++++++++---------------------- drivers/acpi/sleep/poweroff.c | 81 ++++++++++++++++++++++++++++++++++++++----- drivers/base/sys.c | 1 - include/linux/pm.h | 2 +- kernel/power/main.c | 2 +- 5 files changed, 107 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 0a5d2a94131..7249ba2b7a2 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -1,6 +1,7 @@ /* * sleep.c - ACPI sleep support. * + * Copyright (c) 2005 Alexey Starikovskiy * Copyright (c) 2004 David Shaohua Li * Copyright (c) 2000-2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab @@ -14,7 +15,6 @@ #include #include #include -#include #include #include #include "sleep.h" @@ -27,10 +27,11 @@ extern void do_suspend_lowlevel_s4bios(void); extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { - [PM_SUSPEND_ON] = ACPI_STATE_S0, - [PM_SUSPEND_STANDBY] = ACPI_STATE_S1, - [PM_SUSPEND_MEM] = ACPI_STATE_S3, - [PM_SUSPEND_DISK] = ACPI_STATE_S4, + [PM_SUSPEND_ON] = ACPI_STATE_S0, + [PM_SUSPEND_STANDBY] = ACPI_STATE_S1, + [PM_SUSPEND_MEM] = ACPI_STATE_S3, + [PM_SUSPEND_DISK] = ACPI_STATE_S4, + [PM_SUSPEND_MAX] = ACPI_STATE_S5 }; static int init_8259A_after_S1; @@ -44,30 +45,20 @@ static int init_8259A_after_S1; * wakeup code to the waking vector. */ +extern int acpi_sleep_prepare(u32 acpi_state); +extern void acpi_power_off(void); + static int acpi_pm_prepare(suspend_state_t pm_state) { u32 acpi_state = acpi_suspend_states[pm_state]; - if (!sleep_states[acpi_state]) + if (!sleep_states[acpi_state]) { + printk("acpi_pm_prepare does not support %d \n", pm_state); return -EPERM; - - /* do we have a wakeup address for S2 and S3? */ - /* Here, we support only S4BIOS, those we set the wakeup address */ - /* S4OS is only supported for now via swsusp.. */ - if (pm_state == PM_SUSPEND_MEM || pm_state == PM_SUSPEND_DISK) { - if (!acpi_wakeup_address) - return -EFAULT; - acpi_set_firmware_waking_vector( - (acpi_physical_address) virt_to_phys( - (void *)acpi_wakeup_address)); } - ACPI_FLUSH_CPU_CACHE(); - acpi_enable_wakeup_device_prep(acpi_state); - acpi_enter_sleep_state_prep(acpi_state); - return 0; + return acpi_sleep_prepare(acpi_state); } - /** * acpi_pm_enter - Actually enter a sleep state. * @pm_state: State we're entering. @@ -92,11 +83,9 @@ static int acpi_pm_enter(suspend_state_t pm_state) return error; } - local_irq_save(flags); acpi_enable_wakeup_device(acpi_state); - switch (pm_state) - { + switch (pm_state) { case PM_SUSPEND_STANDBY: barrier(); status = acpi_enter_sleep_state(acpi_state); @@ -112,6 +101,10 @@ static int acpi_pm_enter(suspend_state_t pm_state) else do_suspend_lowlevel_s4bios(); break; + case PM_SUSPEND_MAX: + acpi_power_off(); + break; + default: return -EINVAL; } @@ -126,11 +119,9 @@ static int acpi_pm_enter(suspend_state_t pm_state) if (pm_state > PM_SUSPEND_STANDBY) acpi_restore_state_mem(); - return ACPI_SUCCESS(status) ? 0 : -EFAULT; } - /** * acpi_pm_finish - Finish up suspend sequence. * @pm_state: State we're coming out of. @@ -156,27 +147,26 @@ static int acpi_pm_finish(suspend_state_t pm_state) return 0; } - int acpi_suspend(u32 acpi_state) { suspend_state_t states[] = { - [1] = PM_SUSPEND_STANDBY, - [3] = PM_SUSPEND_MEM, - [4] = PM_SUSPEND_DISK, + [1] = PM_SUSPEND_STANDBY, + [3] = PM_SUSPEND_MEM, + [4] = PM_SUSPEND_DISK, + [5] = PM_SUSPEND_MAX }; - if (acpi_state <= 4 && states[acpi_state]) + if (acpi_state < 6 && states[acpi_state]) return pm_suspend(states[acpi_state]); return -EINVAL; } static struct pm_ops acpi_pm_ops = { - .prepare = acpi_pm_prepare, - .enter = acpi_pm_enter, - .finish = acpi_pm_finish, + .prepare = acpi_pm_prepare, + .enter = acpi_pm_enter, + .finish = acpi_pm_finish, }; - /* * Toshiba fails to preserve interrupts over S1, reinitialization * of 8259 is needed after S1 resume. @@ -190,16 +180,16 @@ static int __init init_ints_after_s1(struct dmi_system_id *d) static struct dmi_system_id __initdata acpisleep_dmi_table[] = { { - .callback = init_ints_after_s1, - .ident = "Toshiba Satellite 4030cdt", - .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), }, - }, - { }, + .callback = init_ints_after_s1, + .ident = "Toshiba Satellite 4030cdt", + .matches = {DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),}, + }, + {}, }; static int __init acpi_sleep_init(void) { - int i = 0; + int i = 0; dmi_check_system(acpisleep_dmi_table); @@ -207,7 +197,7 @@ static int __init acpi_sleep_init(void) return 0; printk(KERN_INFO PREFIX "(supports"); - for (i=0; i < ACPI_S_STATE_COUNT; i++) { + for (i = 0; i < ACPI_S_STATE_COUNT; i++) { acpi_status status; u8 type_a, type_b; status = acpi_get_sleep_type_data(i, &type_a, &type_b); diff --git a/drivers/acpi/sleep/poweroff.c b/drivers/acpi/sleep/poweroff.c index da237754ded..1fc86e6b5ab 100644 --- a/drivers/acpi/sleep/poweroff.c +++ b/drivers/acpi/sleep/poweroff.c @@ -3,35 +3,100 @@ * * AKA S5, but it is independent of whether or not the kernel supports * any other sleep support in the system. + * + * Copyright (c) 2005 Alexey Starikovskiy + * + * This file is released under the GPLv2. */ #include #include #include #include +#include +#include #include "sleep.h" -static void -acpi_power_off (void) +int acpi_sleep_prepare(u32 acpi_state) +{ + /* Flag to do not allow second time invocation for S5 state */ + static int shutdown_prepared = 0; +#ifdef CONFIG_ACPI_SLEEP + /* do we have a wakeup address for S2 and S3? */ + /* Here, we support only S4BIOS, those we set the wakeup address */ + /* S4OS is only supported for now via swsusp.. */ + if (acpi_state == ACPI_STATE_S3 || acpi_state == ACPI_STATE_S4) { + if (!acpi_wakeup_address) { + return -EFAULT; + } + acpi_set_firmware_waking_vector((acpi_physical_address) + virt_to_phys((void *) + acpi_wakeup_address)); + + } + ACPI_FLUSH_CPU_CACHE(); + acpi_enable_wakeup_device_prep(acpi_state); +#endif + if (acpi_state == ACPI_STATE_S5) { + /* Check if we were already called */ + if (shutdown_prepared) + return 0; + acpi_wakeup_gpe_poweroff_prepare(); + shutdown_prepared = 1; + } + acpi_enter_sleep_state_prep(acpi_state); + return 0; +} + +void acpi_power_off(void) { - printk("%s called\n",__FUNCTION__); + printk("%s called\n", __FUNCTION__); + acpi_sleep_prepare(ACPI_STATE_S5); + local_irq_disable(); /* Some SMP machines only can poweroff in boot CPU */ set_cpus_allowed(current, cpumask_of_cpu(0)); - acpi_wakeup_gpe_poweroff_prepare(); - acpi_enter_sleep_state_prep(ACPI_STATE_S5); - ACPI_DISABLE_IRQS(); acpi_enter_sleep_state(ACPI_STATE_S5); } +#ifdef CONFIG_PM + +static int acpi_shutdown(struct sys_device *x) +{ + return acpi_sleep_prepare(ACPI_STATE_S5); +} + +static struct sysdev_class acpi_sysclass = { + set_kset_name("acpi"), + .shutdown = acpi_shutdown +}; + +static struct sys_device device_acpi = { + .id = 0, + .cls = &acpi_sysclass, +}; + +#endif + static int acpi_poweroff_init(void) { if (!acpi_disabled) { u8 type_a, type_b; acpi_status status; - status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - if (ACPI_SUCCESS(status)) + status = + acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); + if (ACPI_SUCCESS(status)) { pm_power_off = acpi_power_off; +#ifdef CONFIG_PM + { + int error; + error = sysdev_class_register(&acpi_sysclass); + if (!error) + error = sysdev_register(&device_acpi); + return error; + } +#endif + } } return 0; } diff --git a/drivers/base/sys.c b/drivers/base/sys.c index 9102e3756f9..5474bf9622d 100644 --- a/drivers/base/sys.c +++ b/drivers/base/sys.c @@ -22,7 +22,6 @@ #include #include - extern struct subsystem devices_subsys; #define to_sysdev(k) container_of(k, struct sys_device, kobj) diff --git a/include/linux/pm.h b/include/linux/pm.h index ed2b76e7519..da88851266b 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -175,7 +175,7 @@ struct pm_ops { }; extern void pm_set_ops(struct pm_ops *); - +extern struct pm_ops *pm_ops; extern int pm_suspend(suspend_state_t state); diff --git a/kernel/power/main.c b/kernel/power/main.c index 4cdebc972ff..c7eb4a833db 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -190,7 +190,7 @@ int software_suspend(void) int pm_suspend(suspend_state_t state) { - if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) + if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) return enter_state(state); return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 4e10d12a3d88c88fba3258809aa42d14fd8cf1d1 Mon Sep 17 00:00:00 2001 From: David Shaohua Li Date: Fri, 18 Mar 2005 18:45:35 -0500 Subject: [ACPI] Bind PCI devices with ACPI devices Implement the framework for binding physical devices with ACPI devices. A physical bus like PCI bus should create a 'acpi_bus_type', with: .find_device: For device which has parent such as normal PCI devices. .find_bridge: It's for special devices, such as PCI root bridge or IDE controller. Such devices generally haven't a parent or ->bus. We use the special method to get an ACPI handle. Uses new field in struct device: firmware_data http://bugzilla.kernel.org/show_bug.cgi?id=4277 Signed-off-by: David Shaohua Li Signed-off-by: Len Brown --- drivers/acpi/Makefile | 2 +- drivers/acpi/glue.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/ibm_acpi.c | 4 +- include/acpi/acpi_bus.h | 21 +++ include/linux/device.h | 6 +- 5 files changed, 390 insertions(+), 5 deletions(-) create mode 100644 drivers/acpi/glue.c (limited to 'include/linux') diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 24eb397e17b..ad67e8f61e6 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -36,7 +36,7 @@ processor-objs += processor_perflib.o endif obj-$(CONFIG_ACPI_BUS) += sleep/ -obj-$(CONFIG_ACPI_BUS) += bus.o +obj-$(CONFIG_ACPI_BUS) += bus.o glue.o obj-$(CONFIG_ACPI_AC) += ac.o obj-$(CONFIG_ACPI_BATTERY) += battery.o obj-$(CONFIG_ACPI_BUTTON) += button.o diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c new file mode 100644 index 00000000000..b6d2045caf3 --- /dev/null +++ b/drivers/acpi/glue.c @@ -0,0 +1,362 @@ +/* + * Link physical devices with ACPI devices support + * + * Copyright (c) 2005 David Shaohua Li + * Copyright (c) 2005 Intel Corp. + * + * This file is released under the GPLv2. + */ +#include +#include +#include +#include +#include + +#define ACPI_GLUE_DEBUG 0 +#if ACPI_GLUE_DEBUG +#define DBG(x...) printk(PREFIX x) +#else +#define DBG(x...) +#endif +static LIST_HEAD(bus_type_list); +static DECLARE_RWSEM(bus_type_sem); + +int register_acpi_bus_type(struct acpi_bus_type *type) +{ + if (acpi_disabled) + return -ENODEV; + if (type && type->bus && type->find_device) { + down_write(&bus_type_sem); + list_add_tail(&type->list, &bus_type_list); + up_write(&bus_type_sem); + DBG("ACPI bus type %s registered\n", type->bus->name); + return 0; + } + return -ENODEV; +} + +EXPORT_SYMBOL(register_acpi_bus_type); + +int unregister_acpi_bus_type(struct acpi_bus_type *type) +{ + if (acpi_disabled) + return 0; + if (type) { + down_write(&bus_type_sem); + list_del_init(&type->list); + up_write(&bus_type_sem); + DBG("ACPI bus type %s unregistered\n", type->bus->name); + return 0; + } + return -ENODEV; +} + +EXPORT_SYMBOL(unregister_acpi_bus_type); + +static struct acpi_bus_type *acpi_get_bus_type(struct bus_type *type) +{ + struct acpi_bus_type *tmp, *ret = NULL; + + down_read(&bus_type_sem); + list_for_each_entry(tmp, &bus_type_list, list) { + if (tmp->bus == type) { + ret = tmp; + break; + } + } + up_read(&bus_type_sem); + return ret; +} + +static int acpi_find_bridge_device(struct device *dev, acpi_handle * handle) +{ + struct acpi_bus_type *tmp; + int ret = -ENODEV; + + down_read(&bus_type_sem); + list_for_each_entry(tmp, &bus_type_list, list) { + if (tmp->find_bridge && !tmp->find_bridge(dev, handle)) { + ret = 0; + break; + } + } + up_read(&bus_type_sem); + return ret; +} + +/* Get PCI root bridge's handle from its segment and bus number */ +struct acpi_find_pci_root { + unsigned int seg; + unsigned int bus; + acpi_handle handle; +}; + +static acpi_status +do_root_bridge_busnr_callback(struct acpi_resource *resource, void *data) +{ + int *busnr = (int *)data; + struct acpi_resource_address64 address; + + if (resource->id != ACPI_RSTYPE_ADDRESS16 && + resource->id != ACPI_RSTYPE_ADDRESS32 && + resource->id != ACPI_RSTYPE_ADDRESS64) + return AE_OK; + + acpi_resource_to_address64(resource, &address); + if ((address.address_length > 0) && + (address.resource_type == ACPI_BUS_NUMBER_RANGE)) + *busnr = address.min_address_range; + + return AE_OK; +} + +static int get_root_bridge_busnr(acpi_handle handle) +{ + acpi_status status; + int bus, bbn; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); + + status = acpi_evaluate_integer(handle, METHOD_NAME__BBN, NULL, + (unsigned long *)&bbn); + if (status == AE_NOT_FOUND) { + /* Assume bus = 0 */ + printk(KERN_INFO PREFIX + "Assume root bridge [%s] bus is 0\n", + (char *)buffer.pointer); + status = AE_OK; + bbn = 0; + } + if (ACPI_FAILURE(status)) { + bbn = -ENODEV; + goto exit; + } + if (bbn > 0) + goto exit; + + /* _BBN in some systems return 0 for all root bridges */ + bus = -1; + status = acpi_walk_resources(handle, METHOD_NAME__CRS, + do_root_bridge_busnr_callback, &bus); + /* If _CRS failed, we just use _BBN */ + if (ACPI_FAILURE(status) || (bus == -1)) + goto exit; + /* We select _CRS */ + if (bbn != bus) { + printk(KERN_INFO PREFIX + "_BBN and _CRS returns different value for %s. Select _CRS\n", + (char *)buffer.pointer); + bbn = bus; + } + exit: + acpi_os_free(buffer.pointer); + return bbn; +} + +static acpi_status +find_pci_rootbridge(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + struct acpi_find_pci_root *find = (struct acpi_find_pci_root *)context; + unsigned long seg, bus; + acpi_status status; + int tmp; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); + + status = acpi_evaluate_integer(handle, METHOD_NAME__SEG, NULL, &seg); + if (status == AE_NOT_FOUND) { + /* Assume seg = 0 */ + printk(KERN_INFO PREFIX + "Assume root bridge [%s] segment is 0\n", + (char *)buffer.pointer); + status = AE_OK; + seg = 0; + } + if (ACPI_FAILURE(status)) { + status = AE_CTRL_DEPTH; + goto exit; + } + + tmp = get_root_bridge_busnr(handle); + if (tmp < 0) { + printk(KERN_ERR PREFIX + "Find root bridge failed for %s\n", + (char *)buffer.pointer); + status = AE_CTRL_DEPTH; + goto exit; + } + bus = tmp; + + if (seg == find->seg && bus == find->bus) + find->handle = handle; + status = AE_OK; + exit: + acpi_os_free(buffer.pointer); + return status; +} + +acpi_handle acpi_get_pci_rootbridge_handle(unsigned int seg, unsigned int bus) +{ + struct acpi_find_pci_root find = { seg, bus, NULL }; + + acpi_get_devices(PCI_ROOT_HID_STRING, find_pci_rootbridge, &find, NULL); + return find.handle; +} + +/* Get device's handler per its address under its parent */ +struct acpi_find_child { + acpi_handle handle; + acpi_integer address; +}; + +static acpi_status +do_acpi_find_child(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + acpi_status status; + struct acpi_device_info *info; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_find_child *find = (struct acpi_find_child *)context; + + status = acpi_get_object_info(handle, &buffer); + if (ACPI_SUCCESS(status)) { + info = buffer.pointer; + if (info->address == find->address) + find->handle = handle; + acpi_os_free(buffer.pointer); + } + return AE_OK; +} + +acpi_handle acpi_get_child(acpi_handle parent, acpi_integer address) +{ + struct acpi_find_child find = { NULL, address }; + + if (!parent) + return NULL; + acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, + 1, do_acpi_find_child, &find, NULL); + return find.handle; +} + +EXPORT_SYMBOL(acpi_get_child); + +/* Link ACPI devices with physical devices */ +static void acpi_glue_data_handler(acpi_handle handle, + u32 function, void *context) +{ + /* we provide an empty handler */ +} + +/* Note: a success call will increase reference count by one */ +struct device *acpi_get_physical_device(acpi_handle handle) +{ + acpi_status status; + struct device *dev; + + status = acpi_get_data(handle, acpi_glue_data_handler, (void **)&dev); + if (ACPI_SUCCESS(status)) + return get_device(dev); + return NULL; +} + +EXPORT_SYMBOL(acpi_get_physical_device); + +static int acpi_bind_one(struct device *dev, acpi_handle handle) +{ + acpi_status status; + + if (dev->firmware_data) { + printk(KERN_WARNING PREFIX + "Drivers changed 'firmware_data' for %s\n", dev->bus_id); + return -EINVAL; + } + get_device(dev); + status = acpi_attach_data(handle, acpi_glue_data_handler, dev); + if (ACPI_FAILURE(status)) { + put_device(dev); + return -EINVAL; + } + dev->firmware_data = handle; + + return 0; +} + +static int acpi_unbind_one(struct device *dev) +{ + if (!dev->firmware_data) + return 0; + if (dev == acpi_get_physical_device(dev->firmware_data)) { + /* acpi_get_physical_device increase refcnt by one */ + put_device(dev); + acpi_detach_data(dev->firmware_data, acpi_glue_data_handler); + dev->firmware_data = NULL; + /* acpi_bind_one increase refcnt by one */ + put_device(dev); + } else { + printk(KERN_ERR PREFIX + "Oops, 'firmware_data' corrupt for %s\n", dev->bus_id); + } + return 0; +} + +static int acpi_platform_notify(struct device *dev) +{ + struct acpi_bus_type *type; + acpi_handle handle; + int ret = -EINVAL; + + if (!dev->bus || !dev->parent) { + /* bridge devices genernally haven't bus or parent */ + ret = acpi_find_bridge_device(dev, &handle); + goto end; + } + type = acpi_get_bus_type(dev->bus); + if (!type) { + printk(KERN_INFO PREFIX "No ACPI bus support for %s\n", + dev->bus_id); + ret = -EINVAL; + goto end; + } + if ((ret = type->find_device(dev, &handle)) != 0) + printk(KERN_INFO PREFIX "Can't get handler for %s\n", + dev->bus_id); + end: + if (!ret) + acpi_bind_one(dev, handle); + +#if ACPI_GLUE_DEBUG + if (!ret) { + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + acpi_get_name(dev->firmware_data, ACPI_FULL_PATHNAME, &buffer); + DBG("Device %s -> %s\n", dev->bus_id, (char *)buffer.pointer); + acpi_os_free(buffer.pointer); + } else + DBG("Device %s -> No ACPI support\n", dev->bus_id); +#endif + + return ret; +} + +static int acpi_platform_notify_remove(struct device *dev) +{ + acpi_unbind_one(dev); + return 0; +} + +static int __init init_acpi_device_notify(void) +{ + if (acpi_disabled) + return 0; + if (platform_notify || platform_notify_remove) { + printk(KERN_ERR PREFIX "Can't use platform_notify\n"); + return 0; + } + platform_notify = acpi_platform_notify; + platform_notify_remove = acpi_platform_notify_remove; + return 0; +} + +arch_initcall(init_acpi_device_notify); diff --git a/drivers/acpi/ibm_acpi.c b/drivers/acpi/ibm_acpi.c index 6c8291c3e77..ad85e10001f 100644 --- a/drivers/acpi/ibm_acpi.c +++ b/drivers/acpi/ibm_acpi.c @@ -1025,7 +1025,7 @@ static int setup_notify(struct ibm_struct *ibm) return 0; } -static int device_add(struct acpi_device *device) +static int ibmacpi_device_add(struct acpi_device *device) { return 0; } @@ -1043,7 +1043,7 @@ static int register_driver(struct ibm_struct *ibm) memset(ibm->driver, 0, sizeof(struct acpi_driver)); sprintf(ibm->driver->name, "%s/%s", IBM_NAME, ibm->name); ibm->driver->ids = ibm->hid; - ibm->driver->ops.add = &device_add; + ibm->driver->ops.add = &ibmacpi_device_add; ret = acpi_bus_register_driver(ibm->driver); if (ret < 0) { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c627bc408a6..53b821d7b8a 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -336,6 +336,27 @@ int acpi_match_ids (struct acpi_device *device, char *ids); int acpi_create_dir(struct acpi_device *); void acpi_remove_dir(struct acpi_device *); + +/* + * Bind physical devices with ACPI devices + */ +#include +struct acpi_bus_type { + struct list_head list; + struct bus_type *bus; + /* For general devices under the bus*/ + int (*find_device)(struct device *, acpi_handle*); + /* For bridges, such as PCI root bridge, IDE controller */ + int (*find_bridge)(struct device *, acpi_handle *); +}; +int register_acpi_bus_type(struct acpi_bus_type *); +int unregister_acpi_bus_type(struct acpi_bus_type *); +struct device *acpi_get_physical_device(acpi_handle); +/* helper */ +acpi_handle acpi_get_child(acpi_handle, acpi_integer); +acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); +#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->firmware_data)) + #endif /*CONFIG_ACPI_BUS*/ #endif /*__ACPI_BUS_H__*/ diff --git a/include/linux/device.h b/include/linux/device.h index df94c0de53f..de2d6fe349d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -269,8 +269,10 @@ struct device { struct device_driver *driver; /* which driver has allocated this device */ void *driver_data; /* data private to the driver */ - void *platform_data; /* Platform specific data (e.g. ACPI, - BIOS data relevant to device) */ + void *platform_data; /* Platform specific data, device + core doesn't touch it */ + void *firmware_data; /* Firmware specific data (e.g. ACPI, + BIOS data),reserved for device core*/ struct dev_pm_info power; u64 *dma_mask; /* dma mask (if dma'able device) */ -- cgit v1.2.3-70-g09d2 From 55e59c511cea3c6c721971467c707e9955922bc2 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 31 Mar 2005 22:51:10 -0500 Subject: [ACPI] Evaluate CPEI Processor Override flag ACPI 3.0 added a Correctable Platform Error Interrupt (CPEI) Processor Overide flag to MADT.Platform_Interrupt_Source. Record the processor that was provided as hint from ACPI. Signed-off-by: Ashok Raj Signed-off-by: Len Brown --- arch/ia64/kernel/acpi.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ arch/ia64/kernel/mca.c | 2 +- arch/ia64/kernel/topology.c | 7 ++++++ include/asm-ia64/acpi.h | 9 ++++++++ include/linux/acpi.h | 5 ++++- 5 files changed, 75 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 72dfd9e7de0..1c118b72df3 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -11,6 +11,7 @@ * Copyright (C) 2001 Jenna Hall * Copyright (C) 2001 Takayoshi Kochi * Copyright (C) 2002 Erich Focht + * Copyright (C) 2004 Ashok Raj * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * @@ -67,6 +68,11 @@ EXPORT_SYMBOL(pm_power_off); unsigned char acpi_kbd_controller_present = 1; unsigned char acpi_legacy_devices; +static unsigned int __initdata acpi_madt_rev; + +unsigned int acpi_cpei_override; +unsigned int acpi_cpei_phys_cpuid; + #define MAX_SAPICS 256 u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = { [0 ... MAX_SAPICS - 1] = -1 }; @@ -267,10 +273,56 @@ acpi_parse_plat_int_src ( (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); platform_intr_list[plintsrc->type] = vector; + if (acpi_madt_rev > 1) { + acpi_cpei_override = plintsrc->plint_flags.cpei_override_flag; + } + + /* + * Save the physical id, so we can check when its being removed + */ + acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff; + return 0; } +unsigned int can_cpei_retarget(void) +{ + extern int cpe_vector; + + /* + * Only if CPEI is supported and the override flag + * is present, otherwise return that its re-targettable + * if we are in polling mode. + */ + if (cpe_vector > 0 && !acpi_cpei_override) + return 0; + else + return 1; +} + +unsigned int is_cpu_cpei_target(unsigned int cpu) +{ + unsigned int logical_id; + + logical_id = cpu_logical_id(acpi_cpei_phys_cpuid); + + if (logical_id == cpu) + return 1; + else + return 0; +} + +void set_cpei_target_cpu(unsigned int cpu) +{ + acpi_cpei_phys_cpuid = cpu_physical_id(cpu); +} + +unsigned int get_cpei_target_cpu(void) +{ + return acpi_cpei_phys_cpuid; +} + static int __init acpi_parse_int_src_ovr ( acpi_table_entry_header *header, const unsigned long end) @@ -328,6 +380,8 @@ acpi_parse_madt (unsigned long phys_addr, unsigned long size) acpi_madt = (struct acpi_table_madt *) __va(phys_addr); + acpi_madt_rev = acpi_madt->header.revision; + /* remember the value for reference after free_initmem() */ #ifdef CONFIG_ITANIUM has_8259 = 1; /* Firmware on old Itanium systems is broken */ diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 736e328b5e6..4ebbf397438 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -271,7 +271,7 @@ ia64_mca_log_sal_error_record(int sal_info_type) #ifdef CONFIG_ACPI -static int cpe_vector = -1; +int cpe_vector = -1; static irqreturn_t ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index f1aafd4c05f..d8030f3bd86 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -36,6 +36,13 @@ int arch_register_cpu(int num) parent = &sysfs_nodes[cpu_to_node(num)]; #endif /* CONFIG_NUMA */ + /* + * If CPEI cannot be re-targetted, and this is + * CPEI target, then dont create the control file + */ + if (!can_cpei_retarget() && is_cpu_cpei_target(num)) + sysfs_cpus[num].cpu.no_control = 1; + return register_cpu(&sysfs_cpus[num].cpu, num, parent); } diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h index 6a26a977f25..4c06d455139 100644 --- a/include/asm-ia64/acpi.h +++ b/include/asm-ia64/acpi.h @@ -98,6 +98,15 @@ const char *acpi_get_sysname (void); int acpi_request_vector (u32 int_type); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); +/* + * Record the cpei override flag and current logical cpu. This is + * useful for CPU removal. + */ +extern unsigned int can_cpei_retarget(void); +extern unsigned int is_cpu_cpei_target(unsigned int cpu); +extern void set_cpei_target_cpu(unsigned int cpu); +extern unsigned int get_cpei_target_cpu(void); + #ifdef CONFIG_ACPI_NUMA /* Proximity bitmap length; _PXM is at most 255 (8 bit)*/ #define MAX_PXM_DOMAINS (256) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b123cc08773..70b3c52b75d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -206,7 +206,10 @@ struct acpi_table_plat_int_src { u8 eid; u8 iosapic_vector; u32 global_irq; - u32 reserved; + struct { + u32 cpei_override_flag:1; + u32 reserved:31; + } plint_flags; } __attribute__ ((packed)); enum acpi_interrupt_id { -- cgit v1.2.3-70-g09d2 From c9c3e457de24cca2ca688fa397d93a241f472048 Mon Sep 17 00:00:00 2001 From: David Shaohua Li Date: Fri, 1 Apr 2005 00:07:31 -0500 Subject: [ACPI] PNPACPI vs sound IRQ http://bugme.osdl.org/show_bug.cgi?id=4016 Written-by: David Shaohua Li Acked-by: Adam Belay Signed-off-by: Len Brown --- arch/frv/mb93090-mb00/pci-irq.c | 2 +- arch/i386/pci/irq.c | 16 ++++++++++------ arch/i386/pci/visws.c | 2 +- drivers/acpi/pci_link.c | 7 +++++-- drivers/pnp/pnpacpi/rsparser.c | 4 ++-- drivers/pnp/pnpbios/rsparser.c | 2 +- drivers/pnp/resource.c | 2 +- include/asm-alpha/pci.h | 2 +- include/asm-arm/pci.h | 2 +- include/asm-h8300/pci.h | 2 +- include/asm-i386/pci.h | 2 +- include/asm-ia64/pci.h | 2 +- include/asm-m68k/pci.h | 2 +- include/asm-mips/pci.h | 2 +- include/asm-ppc/pci.h | 2 +- include/asm-ppc64/pci.h | 2 +- include/asm-sh/pci.h | 2 +- include/asm-sh64/pci.h | 2 +- include/asm-sparc/pci.h | 2 +- include/asm-sparc64/pci.h | 2 +- include/asm-x86_64/pci.h | 2 +- include/linux/acpi.h | 2 +- 22 files changed, 36 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/arch/frv/mb93090-mb00/pci-irq.c b/arch/frv/mb93090-mb00/pci-irq.c index 24622d89b1c..af981bda015 100644 --- a/arch/frv/mb93090-mb00/pci-irq.c +++ b/arch/frv/mb93090-mb00/pci-irq.c @@ -60,7 +60,7 @@ void __init pcibios_fixup_irqs(void) } } -void __init pcibios_penalize_isa_irq(int irq) +void __init pcibios_penalize_isa_irq(int irq, int active) { } diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index da21b1d07c1..d21b3a2dc97 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -1006,24 +1006,28 @@ static int __init pcibios_irq_init(void) subsys_initcall(pcibios_irq_init); -static void pirq_penalize_isa_irq(int irq) +static void pirq_penalize_isa_irq(int irq, int active) { /* * If any ISAPnP device reports an IRQ in its list of possible * IRQ's, we try to avoid assigning it to PCI devices. */ - if (irq < 16) - pirq_penalty[irq] += 100; + if (irq < 16) { + if (active) + pirq_penalty[irq] += 1000; + else + pirq_penalty[irq] += 100; + } } -void pcibios_penalize_isa_irq(int irq) +void pcibios_penalize_isa_irq(int irq, int active) { #ifdef CONFIG_ACPI_PCI if (!acpi_noirq) - acpi_penalize_isa_irq(irq); + acpi_penalize_isa_irq(irq, active); else #endif - pirq_penalize_isa_irq(irq); + pirq_penalize_isa_irq(irq, active); } static int pirq_enable_irq(struct pci_dev *dev) diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c index 6a924878443..314c933b6b8 100644 --- a/arch/i386/pci/visws.c +++ b/arch/i386/pci/visws.c @@ -21,7 +21,7 @@ static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; -void __init pcibios_penalize_isa_irq(int irq) {} +void __init pcibios_penalize_isa_irq(int irq, int active) {} unsigned int pci_bus0, pci_bus1; diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index f2271173bbd..6ad0e77df9b 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -804,9 +804,12 @@ static int __init acpi_irq_penalty_update(char *str, int used) * There is no ISA_POSSIBLE weight, so we simply use * the (small) PCI_USING penalty. */ -void acpi_penalize_isa_irq(int irq) +void acpi_penalize_isa_irq(int irq, int active) { - acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING; + if (active) + acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED; + else + acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING; } /* diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index ae3819ad7cf..75575f6c349 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -160,7 +160,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, acpi_register_gsi(res->data.irq.interrupts[0], res->data.irq.edge_level, res->data.irq.active_high_low)); - pcibios_penalize_isa_irq(res->data.irq.interrupts[0]); + pcibios_penalize_isa_irq(res->data.irq.interrupts[0], 1); } break; @@ -171,7 +171,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, acpi_register_gsi(res->data.extended_irq.interrupts[0], res->data.extended_irq.edge_level, res->data.extended_irq.active_high_low)); - pcibios_penalize_isa_irq(res->data.extended_irq.interrupts[0]); + pcibios_penalize_isa_irq(res->data.extended_irq.interrupts[0], 1); } break; case ACPI_RSTYPE_DMA: diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 79bce7b7574..9001b6f0204 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -64,7 +64,7 @@ pnpbios_parse_allocated_irqresource(struct pnp_resource_table * res, int irq) } res->irq_resource[i].start = res->irq_resource[i].end = (unsigned long) irq; - pcibios_penalize_isa_irq(irq); + pcibios_penalize_isa_irq(irq, 1); } } diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 2d1322dd7e1..887ad893934 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -102,7 +102,7 @@ int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data) for (i = 0; i < 16; i++) if (test_bit(i, data->map)) - pcibios_penalize_isa_irq(i); + pcibios_penalize_isa_irq(i, 0); } #endif return 0; diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h index 0c7b57bc043..7109860f98e 100644 --- a/include/asm-alpha/pci.h +++ b/include/asm-alpha/pci.h @@ -58,7 +58,7 @@ struct pci_controller { extern void pcibios_set_master(struct pci_dev *dev); -extern inline void pcibios_penalize_isa_irq(int irq) +extern inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-arm/pci.h b/include/asm-arm/pci.h index 40ffaefbeb1..0f437e26231 100644 --- a/include/asm-arm/pci.h +++ b/include/asm-arm/pci.h @@ -14,7 +14,7 @@ static inline void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -static inline void pcibios_penalize_isa_irq(int irq) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-h8300/pci.h b/include/asm-h8300/pci.h index d032729b19d..5edad5b70fd 100644 --- a/include/asm-h8300/pci.h +++ b/include/asm-h8300/pci.h @@ -15,7 +15,7 @@ extern inline void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -extern inline void pcibios_penalize_isa_irq(int irq) +extern inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-i386/pci.h b/include/asm-i386/pci.h index fb749b85a73..e0dc1cea0b7 100644 --- a/include/asm-i386/pci.h +++ b/include/asm-i386/pci.h @@ -27,7 +27,7 @@ void pcibios_config_init(void); struct pci_bus * pcibios_scan_root(int bus); void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq); +void pcibios_penalize_isa_irq(int irq, int active); struct irq_routing_table *pcibios_get_irq_routing_table(void); int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); diff --git a/include/asm-ia64/pci.h b/include/asm-ia64/pci.h index a8314ee4e7d..1cbd10b96b3 100644 --- a/include/asm-ia64/pci.h +++ b/include/asm-ia64/pci.h @@ -47,7 +47,7 @@ pcibios_set_master (struct pci_dev *dev) } static inline void -pcibios_penalize_isa_irq (int irq) +pcibios_penalize_isa_irq (int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-m68k/pci.h b/include/asm-m68k/pci.h index 9e7d79ab5d1..9d2c07abe44 100644 --- a/include/asm-m68k/pci.h +++ b/include/asm-m68k/pci.h @@ -43,7 +43,7 @@ static inline void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -static inline void pcibios_penalize_isa_irq(int irq) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-mips/pci.h b/include/asm-mips/pci.h index c9c576b4855..3bf1cb5cd54 100644 --- a/include/asm-mips/pci.h +++ b/include/asm-mips/pci.h @@ -69,7 +69,7 @@ extern unsigned long PCIBIOS_MIN_MEM; extern void pcibios_set_master(struct pci_dev *dev); -static inline void pcibios_penalize_isa_irq(int irq) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-ppc/pci.h b/include/asm-ppc/pci.h index ce5ae6d048f..ebd34fffc73 100644 --- a/include/asm-ppc/pci.h +++ b/include/asm-ppc/pci.h @@ -37,7 +37,7 @@ extern inline void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -extern inline void pcibios_penalize_isa_irq(int irq) +extern inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h index 6cd593f660a..7c11687df3b 100644 --- a/include/asm-ppc64/pci.h +++ b/include/asm-ppc64/pci.h @@ -37,7 +37,7 @@ static inline void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -static inline void pcibios_penalize_isa_irq(int irq) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-sh/pci.h b/include/asm-sh/pci.h index 9c3b63d0105..92bcb03426f 100644 --- a/include/asm-sh/pci.h +++ b/include/asm-sh/pci.h @@ -36,7 +36,7 @@ struct pci_dev; extern void pcibios_set_master(struct pci_dev *dev); -static inline void pcibios_penalize_isa_irq(int irq) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-sh64/pci.h b/include/asm-sh64/pci.h index 8cc14e13975..ea711108f0e 100644 --- a/include/asm-sh64/pci.h +++ b/include/asm-sh64/pci.h @@ -26,7 +26,7 @@ extern void pcibios_set_master(struct pci_dev *dev); /* * Set penalize isa irq function */ -static inline void pcibios_penalize_isa_irq(int irq) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-sparc/pci.h b/include/asm-sparc/pci.h index d200a25a737..d875d9496a8 100644 --- a/include/asm-sparc/pci.h +++ b/include/asm-sparc/pci.h @@ -20,7 +20,7 @@ extern inline void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -extern inline void pcibios_penalize_isa_irq(int irq) +extern inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-sparc64/pci.h b/include/asm-sparc64/pci.h index 2a0c85cd1c1..e38d6598d62 100644 --- a/include/asm-sparc64/pci.h +++ b/include/asm-sparc64/pci.h @@ -23,7 +23,7 @@ static inline void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -static inline void pcibios_penalize_isa_irq(int irq) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h index 8712520ca47..9e8c273b785 100644 --- a/include/asm-x86_64/pci.h +++ b/include/asm-x86_64/pci.h @@ -33,7 +33,7 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int le extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq); +void pcibios_penalize_isa_irq(int irq, int active); struct irq_routing_table *pcibios_get_irq_routing_table(void); int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 70b3c52b75d..9c14959bcfa 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -465,7 +465,7 @@ struct acpi_prt_list { struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); -void acpi_penalize_isa_irq(int irq); +void acpi_penalize_isa_irq(int irq, int active); #ifdef CONFIG_ACPI_DEALLOCATE_IRQ void acpi_pci_irq_disable (struct pci_dev *dev); -- cgit v1.2.3-70-g09d2 From 5db539e49fc7471e23bf3c94ca304f008cb7b7f3 Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Thu, 23 Jun 2005 20:25:36 +0300 Subject: [PATCH] USB: Fix kmalloc's flags type in USB Greg, This patch fixes the kmalloc() flags argument type in USB subsystem; hopefully all of its occurences. The patch was made against patch-2.6.12-git2 from Jun 20. Cleanup of flags for kmalloc() in USB subsystem. Signed-off-by: Olav Kongas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/buffer.c | 2 +- drivers/usb/core/hcd.c | 2 +- drivers/usb/core/hcd.h | 8 ++++---- drivers/usb/core/message.c | 2 +- drivers/usb/core/urb.c | 4 ++-- drivers/usb/core/usb.c | 2 +- drivers/usb/gadget/dummy_hcd.c | 9 +++++---- drivers/usb/gadget/ether.c | 18 +++++++++--------- drivers/usb/gadget/goku_udc.c | 6 +++--- drivers/usb/gadget/lh7a40x_udc.c | 6 +++--- drivers/usb/gadget/net2280.c | 6 +++--- drivers/usb/gadget/omap_udc.c | 6 +++--- drivers/usb/gadget/pxa2xx_udc.c | 6 +++--- drivers/usb/gadget/zero.c | 8 ++++---- drivers/usb/host/ehci-hcd.c | 2 +- drivers/usb/host/ehci-q.c | 2 +- drivers/usb/host/ehci-sched.c | 19 +++++++++++-------- drivers/usb/host/hc_crisv10.c | 10 ++++++---- drivers/usb/host/isp116x-hcd.c | 4 ++-- drivers/usb/host/ohci-hcd.c | 2 +- drivers/usb/host/ohci-mem.c | 4 ++-- drivers/usb/host/sl811-hcd.c | 2 +- drivers/usb/host/uhci-q.c | 2 +- drivers/usb/net/kaweth.c | 4 ++-- include/linux/usb.h | 8 ++++---- include/linux/usb_gadget.h | 12 ++++++------ 26 files changed, 81 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index b7827df21f4..fc15b4acc8a 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -106,7 +106,7 @@ void hcd_buffer_destroy (struct usb_hcd *hcd) void *hcd_buffer_alloc ( struct usb_bus *bus, size_t size, - int mem_flags, + unsigned mem_flags, dma_addr_t *dma ) { diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 83e732a0d64..8616356f55e 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1112,7 +1112,7 @@ static void urb_unlink (struct urb *urb) * expects usb_submit_urb() to have sanity checked and conditioned all * inputs in the urb */ -static int hcd_submit_urb (struct urb *urb, int mem_flags) +static int hcd_submit_urb (struct urb *urb, unsigned mem_flags) { int status; struct usb_hcd *hcd = urb->dev->bus->hcpriv; diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h index 8dc13cde2f7..67db4a999b9 100644 --- a/drivers/usb/core/hcd.h +++ b/drivers/usb/core/hcd.h @@ -142,12 +142,12 @@ struct hcd_timeout { /* timeouts we allocate */ struct usb_operations { int (*get_frame_number) (struct usb_device *usb_dev); - int (*submit_urb) (struct urb *urb, int mem_flags); + int (*submit_urb) (struct urb *urb, unsigned mem_flags); int (*unlink_urb) (struct urb *urb, int status); /* allocate dma-consistent buffer for URB_DMA_NOMAPPING */ void *(*buffer_alloc)(struct usb_bus *bus, size_t size, - int mem_flags, + unsigned mem_flags, dma_addr_t *dma); void (*buffer_free)(struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma); @@ -200,7 +200,7 @@ struct hc_driver { int (*urb_enqueue) (struct usb_hcd *hcd, struct usb_host_endpoint *ep, struct urb *urb, - int mem_flags); + unsigned mem_flags); int (*urb_dequeue) (struct usb_hcd *hcd, struct urb *urb); /* hw synch, freeing endpoint resources that urb_dequeue can't */ @@ -247,7 +247,7 @@ int hcd_buffer_create (struct usb_hcd *hcd); void hcd_buffer_destroy (struct usb_hcd *hcd); void *hcd_buffer_alloc (struct usb_bus *bus, size_t size, - int mem_flags, dma_addr_t *dma); + unsigned mem_flags, dma_addr_t *dma); void hcd_buffer_free (struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index f50aaf25c98..a428ef479bd 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -320,7 +320,7 @@ int usb_sg_init ( struct scatterlist *sg, int nents, size_t length, - int mem_flags + unsigned mem_flags ) { int i; diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 0faf18d511d..c0feee25ff0 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -60,7 +60,7 @@ void usb_init_urb(struct urb *urb) * * The driver must call usb_free_urb() when it is finished with the urb. */ -struct urb *usb_alloc_urb(int iso_packets, int mem_flags) +struct urb *usb_alloc_urb(int iso_packets, unsigned mem_flags) { struct urb *urb; @@ -224,7 +224,7 @@ struct urb * usb_get_urb(struct urb *urb) * GFP_NOIO, unless b) or c) apply * */ -int usb_submit_urb(struct urb *urb, int mem_flags) +int usb_submit_urb(struct urb *urb, unsigned mem_flags) { int pipe, temp, max; struct usb_device *dev; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index a3c42203213..7713a605fce 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -1129,7 +1129,7 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size, void *usb_buffer_alloc ( struct usb_device *dev, size_t size, - int mem_flags, + unsigned mem_flags, dma_addr_t *dma ) { diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c index 4d692670f28..583db7c38cf 100644 --- a/drivers/usb/gadget/dummy_hcd.c +++ b/drivers/usb/gadget/dummy_hcd.c @@ -470,7 +470,7 @@ static int dummy_disable (struct usb_ep *_ep) } static struct usb_request * -dummy_alloc_request (struct usb_ep *_ep, int mem_flags) +dummy_alloc_request (struct usb_ep *_ep, unsigned mem_flags) { struct dummy_ep *ep; struct dummy_request *req; @@ -507,7 +507,7 @@ dummy_alloc_buffer ( struct usb_ep *_ep, unsigned bytes, dma_addr_t *dma, - int mem_flags + unsigned mem_flags ) { char *retval; struct dummy_ep *ep; @@ -540,7 +540,8 @@ fifo_complete (struct usb_ep *ep, struct usb_request *req) } static int -dummy_queue (struct usb_ep *_ep, struct usb_request *_req, int mem_flags) +dummy_queue (struct usb_ep *_ep, struct usb_request *_req, + unsigned mem_flags) { struct dummy_ep *ep; struct dummy_request *req; @@ -998,7 +999,7 @@ static int dummy_urb_enqueue ( struct usb_hcd *hcd, struct usb_host_endpoint *ep, struct urb *urb, - int mem_flags + unsigned mem_flags ) { struct dummy *dum; struct urbp *urbp; diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c index 5bb53ae8896..00a5d256626 100644 --- a/drivers/usb/gadget/ether.c +++ b/drivers/usb/gadget/ether.c @@ -945,11 +945,11 @@ config_buf (enum usb_device_speed speed, /*-------------------------------------------------------------------------*/ -static void eth_start (struct eth_dev *dev, int gfp_flags); -static int alloc_requests (struct eth_dev *dev, unsigned n, int gfp_flags); +static void eth_start (struct eth_dev *dev, unsigned gfp_flags); +static int alloc_requests (struct eth_dev *dev, unsigned n, unsigned gfp_flags); static int -set_ether_config (struct eth_dev *dev, int gfp_flags) +set_ether_config (struct eth_dev *dev, unsigned gfp_flags) { int result = 0; struct usb_gadget *gadget = dev->gadget; @@ -1079,7 +1079,7 @@ static void eth_reset_config (struct eth_dev *dev) * that returns config descriptors, and altsetting code. */ static int -eth_set_config (struct eth_dev *dev, unsigned number, int gfp_flags) +eth_set_config (struct eth_dev *dev, unsigned number, unsigned gfp_flags) { int result = 0; struct usb_gadget *gadget = dev->gadget; @@ -1596,7 +1596,7 @@ static void defer_kevent (struct eth_dev *dev, int flag) static void rx_complete (struct usb_ep *ep, struct usb_request *req); static int -rx_submit (struct eth_dev *dev, struct usb_request *req, int gfp_flags) +rx_submit (struct eth_dev *dev, struct usb_request *req, unsigned gfp_flags) { struct sk_buff *skb; int retval = -ENOMEM; @@ -1722,7 +1722,7 @@ clean: } static int prealloc (struct list_head *list, struct usb_ep *ep, - unsigned n, int gfp_flags) + unsigned n, unsigned gfp_flags) { unsigned i; struct usb_request *req; @@ -1761,7 +1761,7 @@ extra: return 0; } -static int alloc_requests (struct eth_dev *dev, unsigned n, int gfp_flags) +static int alloc_requests (struct eth_dev *dev, unsigned n, unsigned gfp_flags) { int status; @@ -1777,7 +1777,7 @@ fail: return status; } -static void rx_fill (struct eth_dev *dev, int gfp_flags) +static void rx_fill (struct eth_dev *dev, unsigned gfp_flags) { struct usb_request *req; unsigned long flags; @@ -2022,7 +2022,7 @@ static int rndis_control_ack (struct net_device *net) #endif /* RNDIS */ -static void eth_start (struct eth_dev *dev, int gfp_flags) +static void eth_start (struct eth_dev *dev, unsigned gfp_flags) { DEBUG (dev, "%s\n", __FUNCTION__); diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index ed773a9111d..eaab26f4ed3 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -269,7 +269,7 @@ static int goku_ep_disable(struct usb_ep *_ep) /*-------------------------------------------------------------------------*/ static struct usb_request * -goku_alloc_request(struct usb_ep *_ep, int gfp_flags) +goku_alloc_request(struct usb_ep *_ep, unsigned gfp_flags) { struct goku_request *req; @@ -327,7 +327,7 @@ goku_free_request(struct usb_ep *_ep, struct usb_request *_req) */ static void * goku_alloc_buffer(struct usb_ep *_ep, unsigned bytes, - dma_addr_t *dma, int gfp_flags) + dma_addr_t *dma, unsigned gfp_flags) { void *retval; struct goku_ep *ep; @@ -789,7 +789,7 @@ finished: /*-------------------------------------------------------------------------*/ static int -goku_queue(struct usb_ep *_ep, struct usb_request *_req, int gfp_flags) +goku_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) { struct goku_request *req; struct goku_ep *ep; diff --git a/drivers/usb/gadget/lh7a40x_udc.c b/drivers/usb/gadget/lh7a40x_udc.c index df75ab65a5e..4842577789c 100644 --- a/drivers/usb/gadget/lh7a40x_udc.c +++ b/drivers/usb/gadget/lh7a40x_udc.c @@ -1106,7 +1106,7 @@ static int lh7a40x_ep_disable(struct usb_ep *_ep) } static struct usb_request *lh7a40x_alloc_request(struct usb_ep *ep, - int gfp_flags) + unsigned gfp_flags) { struct lh7a40x_request *req; @@ -1134,7 +1134,7 @@ static void lh7a40x_free_request(struct usb_ep *ep, struct usb_request *_req) } static void *lh7a40x_alloc_buffer(struct usb_ep *ep, unsigned bytes, - dma_addr_t * dma, int gfp_flags) + dma_addr_t * dma, unsigned gfp_flags) { char *retval; @@ -1158,7 +1158,7 @@ static void lh7a40x_free_buffer(struct usb_ep *ep, void *buf, dma_addr_t dma, * NOTE: Sets INDEX register */ static int lh7a40x_queue(struct usb_ep *_ep, struct usb_request *_req, - int gfp_flags) + unsigned gfp_flags) { struct lh7a40x_request *req; struct lh7a40x_ep *ep; diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index 13a3dbc9949..234a1a97b84 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -376,7 +376,7 @@ static int net2280_disable (struct usb_ep *_ep) /*-------------------------------------------------------------------------*/ static struct usb_request * -net2280_alloc_request (struct usb_ep *_ep, int gfp_flags) +net2280_alloc_request (struct usb_ep *_ep, unsigned gfp_flags) { struct net2280_ep *ep; struct net2280_request *req; @@ -463,7 +463,7 @@ net2280_alloc_buffer ( struct usb_ep *_ep, unsigned bytes, dma_addr_t *dma, - int gfp_flags + unsigned gfp_flags ) { void *retval; @@ -897,7 +897,7 @@ done (struct net2280_ep *ep, struct net2280_request *req, int status) /*-------------------------------------------------------------------------*/ static int -net2280_queue (struct usb_ep *_ep, struct usb_request *_req, int gfp_flags) +net2280_queue (struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) { struct net2280_request *req; struct net2280_ep *ep; diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index a2b812af6e6..c906d675ef4 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -269,7 +269,7 @@ static int omap_ep_disable(struct usb_ep *_ep) /*-------------------------------------------------------------------------*/ static struct usb_request * -omap_alloc_request(struct usb_ep *ep, int gfp_flags) +omap_alloc_request(struct usb_ep *ep, unsigned gfp_flags) { struct omap_req *req; @@ -298,7 +298,7 @@ omap_alloc_buffer( struct usb_ep *_ep, unsigned bytes, dma_addr_t *dma, - int gfp_flags + unsigned gfp_flags ) { void *retval; @@ -937,7 +937,7 @@ static void dma_channel_release(struct omap_ep *ep) /*-------------------------------------------------------------------------*/ static int -omap_ep_queue(struct usb_ep *_ep, struct usb_request *_req, int gfp_flags) +omap_ep_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) { struct omap_ep *ep = container_of(_ep, struct omap_ep, ep); struct omap_req *req = container_of(_req, struct omap_req, req); diff --git a/drivers/usb/gadget/pxa2xx_udc.c b/drivers/usb/gadget/pxa2xx_udc.c index 6a0b957af33..1507738337c 100644 --- a/drivers/usb/gadget/pxa2xx_udc.c +++ b/drivers/usb/gadget/pxa2xx_udc.c @@ -332,7 +332,7 @@ static int pxa2xx_ep_disable (struct usb_ep *_ep) * pxa2xx_ep_alloc_request - allocate a request data structure */ static struct usb_request * -pxa2xx_ep_alloc_request (struct usb_ep *_ep, int gfp_flags) +pxa2xx_ep_alloc_request (struct usb_ep *_ep, unsigned gfp_flags) { struct pxa2xx_request *req; @@ -367,7 +367,7 @@ pxa2xx_ep_free_request (struct usb_ep *_ep, struct usb_request *_req) */ static void * pxa2xx_ep_alloc_buffer(struct usb_ep *_ep, unsigned bytes, - dma_addr_t *dma, int gfp_flags) + dma_addr_t *dma, unsigned gfp_flags) { char *retval; @@ -874,7 +874,7 @@ done: /*-------------------------------------------------------------------------*/ static int -pxa2xx_ep_queue(struct usb_ep *_ep, struct usb_request *_req, int gfp_flags) +pxa2xx_ep_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) { struct pxa2xx_request *req; struct pxa2xx_ep *ep; diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c index a6e035e2447..bb9b2d94eed 100644 --- a/drivers/usb/gadget/zero.c +++ b/drivers/usb/gadget/zero.c @@ -612,7 +612,7 @@ static void source_sink_complete (struct usb_ep *ep, struct usb_request *req) } static struct usb_request * -source_sink_start_ep (struct usb_ep *ep, int gfp_flags) +source_sink_start_ep (struct usb_ep *ep, unsigned gfp_flags) { struct usb_request *req; int status; @@ -640,7 +640,7 @@ source_sink_start_ep (struct usb_ep *ep, int gfp_flags) } static int -set_source_sink_config (struct zero_dev *dev, int gfp_flags) +set_source_sink_config (struct zero_dev *dev, unsigned gfp_flags) { int result = 0; struct usb_ep *ep; @@ -744,7 +744,7 @@ static void loopback_complete (struct usb_ep *ep, struct usb_request *req) } static int -set_loopback_config (struct zero_dev *dev, int gfp_flags) +set_loopback_config (struct zero_dev *dev, unsigned gfp_flags) { int result = 0; struct usb_ep *ep; @@ -845,7 +845,7 @@ static void zero_reset_config (struct zero_dev *dev) * by limiting configuration choices (like the pxa2xx). */ static int -zero_set_config (struct zero_dev *dev, unsigned number, int gfp_flags) +zero_set_config (struct zero_dev *dev, unsigned number, unsigned gfp_flags) { int result = 0; struct usb_gadget *gadget = dev->gadget; diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 35248a37b71..149b13fc0a7 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -960,7 +960,7 @@ static int ehci_urb_enqueue ( struct usb_hcd *hcd, struct usb_host_endpoint *ep, struct urb *urb, - int mem_flags + unsigned mem_flags ) { struct ehci_hcd *ehci = hcd_to_ehci (hcd); struct list_head qtd_list; diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 45d89a7083b..d74b2d68a50 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -898,7 +898,7 @@ submit_async ( struct usb_host_endpoint *ep, struct urb *urb, struct list_head *qtd_list, - int mem_flags + unsigned mem_flags ) { struct ehci_qtd *qtd; int epnum; diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index c2104cad403..9af4f64532a 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -588,7 +588,7 @@ static int intr_submit ( struct usb_host_endpoint *ep, struct urb *urb, struct list_head *qtd_list, - int mem_flags + unsigned mem_flags ) { unsigned epnum; unsigned long flags; @@ -633,7 +633,7 @@ done: /* ehci_iso_stream ops work with both ITD and SITD */ static struct ehci_iso_stream * -iso_stream_alloc (int mem_flags) +iso_stream_alloc (unsigned mem_flags) { struct ehci_iso_stream *stream; @@ -846,7 +846,7 @@ iso_stream_find (struct ehci_hcd *ehci, struct urb *urb) /* ehci_iso_sched ops can be ITD-only or SITD-only */ static struct ehci_iso_sched * -iso_sched_alloc (unsigned packets, int mem_flags) +iso_sched_alloc (unsigned packets, unsigned mem_flags) { struct ehci_iso_sched *iso_sched; int size = sizeof *iso_sched; @@ -919,7 +919,7 @@ itd_urb_transaction ( struct ehci_iso_stream *stream, struct ehci_hcd *ehci, struct urb *urb, - int mem_flags + unsigned mem_flags ) { struct ehci_itd *itd; @@ -1412,7 +1412,8 @@ itd_complete ( /*-------------------------------------------------------------------------*/ -static int itd_submit (struct ehci_hcd *ehci, struct urb *urb, int mem_flags) +static int itd_submit (struct ehci_hcd *ehci, struct urb *urb, + unsigned mem_flags) { int status = -EINVAL; unsigned long flags; @@ -1523,7 +1524,7 @@ sitd_urb_transaction ( struct ehci_iso_stream *stream, struct ehci_hcd *ehci, struct urb *urb, - int mem_flags + unsigned mem_flags ) { struct ehci_sitd *sitd; @@ -1772,7 +1773,8 @@ sitd_complete ( } -static int sitd_submit (struct ehci_hcd *ehci, struct urb *urb, int mem_flags) +static int sitd_submit (struct ehci_hcd *ehci, struct urb *urb, + unsigned mem_flags) { int status = -EINVAL; unsigned long flags; @@ -1822,7 +1824,8 @@ done: #else static inline int -sitd_submit (struct ehci_hcd *ehci, struct urb *urb, int mem_flags) +sitd_submit (struct ehci_hcd *ehci, struct urb *urb, + unsigned mem_flags) { ehci_dbg (ehci, "split iso support is disabled\n"); return -ENOSYS; diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c index d9883d774d3..81f8f6b7fdc 100644 --- a/drivers/usb/host/hc_crisv10.c +++ b/drivers/usb/host/hc_crisv10.c @@ -463,7 +463,8 @@ static void etrax_usb_free_epid(int epid); static int etrax_remove_from_sb_list(struct urb *urb); -static void* etrax_usb_buffer_alloc(struct usb_bus* bus, size_t size, int mem_flags, dma_addr_t *dma); +static void* etrax_usb_buffer_alloc(struct usb_bus* bus, size_t size, + unsigned mem_flags, dma_addr_t *dma); static void etrax_usb_buffer_free(struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma); static void etrax_usb_add_to_bulk_sb_list(struct urb *urb, int epid); @@ -476,7 +477,7 @@ static int etrax_usb_submit_ctrl_urb(struct urb *urb); static int etrax_usb_submit_intr_urb(struct urb *urb); static int etrax_usb_submit_isoc_urb(struct urb *urb); -static int etrax_usb_submit_urb(struct urb *urb, int mem_flags); +static int etrax_usb_submit_urb(struct urb *urb, unsigned mem_flags); static int etrax_usb_unlink_urb(struct urb *urb, int status); static int etrax_usb_get_frame_number(struct usb_device *usb_dev); @@ -1262,7 +1263,7 @@ static int etrax_usb_allocate_epid(void) return -1; } -static int etrax_usb_submit_urb(struct urb *urb, int mem_flags) +static int etrax_usb_submit_urb(struct urb *urb, unsigned mem_flags) { etrax_hc_t *hc; int ret = -EINVAL; @@ -4277,7 +4278,8 @@ etrax_usb_bulk_eot_timer_func(unsigned long dummy) } static void* -etrax_usb_buffer_alloc(struct usb_bus* bus, size_t size, int mem_flags, dma_addr_t *dma) +etrax_usb_buffer_alloc(struct usb_bus* bus, size_t size, + unsigned mem_flags, dma_addr_t *dma) { return kmalloc(size, mem_flags); } diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index 3f2cea21efc..50b1970fe6b 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -693,7 +693,7 @@ static int balance(struct isp116x *isp116x, u16 period, u16 load) static int isp116x_urb_enqueue(struct usb_hcd *hcd, struct usb_host_endpoint *hep, struct urb *urb, - int mem_flags) + unsigned mem_flags) { struct isp116x *isp116x = hcd_to_isp116x(hcd); struct usb_device *udev = urb->dev; @@ -715,7 +715,7 @@ static int isp116x_urb_enqueue(struct usb_hcd *hcd, } /* avoid all allocations within spinlocks: request or endpoint */ if (!hep->hcpriv) { - ep = kcalloc(1, sizeof *ep, (__force unsigned)mem_flags); + ep = kcalloc(1, sizeof *ep, mem_flags); if (!ep) return -ENOMEM; } diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 13cd2177b55..0375097850e 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -180,7 +180,7 @@ static int ohci_urb_enqueue ( struct usb_hcd *hcd, struct usb_host_endpoint *ep, struct urb *urb, - int mem_flags + unsigned mem_flags ) { struct ohci_hcd *ohci = hcd_to_ohci (hcd); struct ed *ed; diff --git a/drivers/usb/host/ohci-mem.c b/drivers/usb/host/ohci-mem.c index 23735a36af0..fd3c4d3714b 100644 --- a/drivers/usb/host/ohci-mem.c +++ b/drivers/usb/host/ohci-mem.c @@ -84,7 +84,7 @@ dma_to_td (struct ohci_hcd *hc, dma_addr_t td_dma) /* TDs ... */ static struct td * -td_alloc (struct ohci_hcd *hc, int mem_flags) +td_alloc (struct ohci_hcd *hc, unsigned mem_flags) { dma_addr_t dma; struct td *td; @@ -118,7 +118,7 @@ td_free (struct ohci_hcd *hc, struct td *td) /* EDs ... */ static struct ed * -ed_alloc (struct ohci_hcd *hc, int mem_flags) +ed_alloc (struct ohci_hcd *hc, unsigned mem_flags) { dma_addr_t dma; struct ed *ed; diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index 6c3f910bc30..7a890a65f55 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -815,7 +815,7 @@ static int sl811h_urb_enqueue( struct usb_hcd *hcd, struct usb_host_endpoint *hep, struct urb *urb, - int mem_flags + unsigned mem_flags ) { struct sl811 *sl811 = hcd_to_sl811(hcd); struct usb_device *udev = urb->dev; diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c index 5f18084a116..bbb36cd6ed6 100644 --- a/drivers/usb/host/uhci-q.c +++ b/drivers/usb/host/uhci-q.c @@ -1164,7 +1164,7 @@ static struct urb *uhci_find_urb_ep(struct uhci_hcd *uhci, struct urb *urb) static int uhci_urb_enqueue(struct usb_hcd *hcd, struct usb_host_endpoint *ep, - struct urb *urb, int mem_flags) + struct urb *urb, unsigned mem_flags) { int ret; struct uhci_hcd *uhci = hcd_to_uhci(hcd); diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c index fd6ff4cb2c6..7ffa99b9760 100644 --- a/drivers/usb/net/kaweth.c +++ b/drivers/usb/net/kaweth.c @@ -477,7 +477,7 @@ static int kaweth_reset(struct kaweth_device *kaweth) } static void kaweth_usb_receive(struct urb *, struct pt_regs *regs); -static int kaweth_resubmit_rx_urb(struct kaweth_device *, int); +static int kaweth_resubmit_rx_urb(struct kaweth_device *, unsigned); /**************************************************************** int_callback @@ -550,7 +550,7 @@ static void kaweth_resubmit_tl(void *d) * kaweth_resubmit_rx_urb ****************************************************************/ static int kaweth_resubmit_rx_urb(struct kaweth_device *kaweth, - int mem_flags) + unsigned mem_flags) { int result; diff --git a/include/linux/usb.h b/include/linux/usb.h index eb282b58154..72463779299 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -938,17 +938,17 @@ static inline void usb_fill_int_urb (struct urb *urb, } extern void usb_init_urb(struct urb *urb); -extern struct urb *usb_alloc_urb(int iso_packets, int mem_flags); +extern struct urb *usb_alloc_urb(int iso_packets, unsigned mem_flags); extern void usb_free_urb(struct urb *urb); #define usb_put_urb usb_free_urb extern struct urb *usb_get_urb(struct urb *urb); -extern int usb_submit_urb(struct urb *urb, int mem_flags); +extern int usb_submit_urb(struct urb *urb, unsigned mem_flags); extern int usb_unlink_urb(struct urb *urb); extern void usb_kill_urb(struct urb *urb); #define HAVE_USB_BUFFERS void *usb_buffer_alloc (struct usb_device *dev, size_t size, - int mem_flags, dma_addr_t *dma); + unsigned mem_flags, dma_addr_t *dma); void usb_buffer_free (struct usb_device *dev, size_t size, void *addr, dma_addr_t dma); @@ -1055,7 +1055,7 @@ int usb_sg_init ( struct scatterlist *sg, int nents, size_t length, - int mem_flags + unsigned mem_flags ); void usb_sg_cancel (struct usb_sg_request *io); void usb_sg_wait (struct usb_sg_request *io); diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h index b00f127cb44..71e60860732 100644 --- a/include/linux/usb_gadget.h +++ b/include/linux/usb_gadget.h @@ -107,18 +107,18 @@ struct usb_ep_ops { int (*disable) (struct usb_ep *ep); struct usb_request *(*alloc_request) (struct usb_ep *ep, - int gfp_flags); + unsigned gfp_flags); void (*free_request) (struct usb_ep *ep, struct usb_request *req); void *(*alloc_buffer) (struct usb_ep *ep, unsigned bytes, - dma_addr_t *dma, int gfp_flags); + dma_addr_t *dma, unsigned gfp_flags); void (*free_buffer) (struct usb_ep *ep, void *buf, dma_addr_t dma, unsigned bytes); // NOTE: on 2.6, drivers may also use dma_map() and // dma_sync_single_*() to directly manage dma overhead. int (*queue) (struct usb_ep *ep, struct usb_request *req, - int gfp_flags); + unsigned gfp_flags); int (*dequeue) (struct usb_ep *ep, struct usb_request *req); int (*set_halt) (struct usb_ep *ep, int value); @@ -214,7 +214,7 @@ usb_ep_disable (struct usb_ep *ep) * Returns the request, or null if one could not be allocated. */ static inline struct usb_request * -usb_ep_alloc_request (struct usb_ep *ep, int gfp_flags) +usb_ep_alloc_request (struct usb_ep *ep, unsigned gfp_flags) { return ep->ops->alloc_request (ep, gfp_flags); } @@ -254,7 +254,7 @@ usb_ep_free_request (struct usb_ep *ep, struct usb_request *req) */ static inline void * usb_ep_alloc_buffer (struct usb_ep *ep, unsigned len, dma_addr_t *dma, - int gfp_flags) + unsigned gfp_flags) { return ep->ops->alloc_buffer (ep, len, dma, gfp_flags); } @@ -330,7 +330,7 @@ usb_ep_free_buffer (struct usb_ep *ep, void *buf, dma_addr_t dma, unsigned len) * reported when the usb peripheral is disconnected. */ static inline int -usb_ep_queue (struct usb_ep *ep, struct usb_request *req, int gfp_flags) +usb_ep_queue (struct usb_ep *ep, struct usb_request *req, unsigned gfp_flags) { return ep->ops->queue (ep, req, gfp_flags); } -- cgit v1.2.3-70-g09d2 From 00ab997dd24fff82900665449f859e23a78ad5f4 Mon Sep 17 00:00:00 2001 From: "david-b@pacbell.net" Date: Wed, 29 Jun 2005 07:04:14 -0700 Subject: [PATCH] USB: another cdc descriptor This adds another CDC descriptor type to ; the main claim to fame for this is that some Motorola phones include it. It's not currently needed by any driver code; included for completeness. Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_cdc.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb_cdc.h b/include/linux/usb_cdc.h index f22d6beecc7..ba617c37245 100644 --- a/include/linux/usb_cdc.h +++ b/include/linux/usb_cdc.h @@ -34,6 +34,7 @@ #define USB_CDC_ACM_TYPE 0x02 /* acm_descriptor */ #define USB_CDC_UNION_TYPE 0x06 /* union_desc */ #define USB_CDC_COUNTRY_TYPE 0x07 +#define USB_CDC_NETWORK_TERMINAL_TYPE 0x0a /* network_terminal_desc */ #define USB_CDC_ETHERNET_TYPE 0x0f /* ether_desc */ #define USB_CDC_WHCM_TYPE 0x11 #define USB_CDC_MDLM_TYPE 0x12 /* mdlm_desc */ @@ -83,6 +84,18 @@ struct usb_cdc_union_desc { /* ... and there could be other slave interfaces */ } __attribute__ ((packed)); +/* "Network Channel Terminal Functional Descriptor" from CDC spec 5.2.3.11 */ +struct usb_cdc_network_terminal_desc { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + + __u8 bEntityId; + __u8 iName; + __u8 bChannelIndex; + __u8 bPhysicalInterface; +} __attribute__ ((packed)); + /* "Ethernet Networking Functional Descriptor" from CDC spec 5.2.3.16 */ struct usb_cdc_ether_desc { __u8 bLength; -- cgit v1.2.3-70-g09d2 From ab611487d8ada506e511d2b8f22fb8e7be9939b9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 12 Jul 2005 12:08:43 -0700 Subject: [NET]: __be'ify *_type_trans() tr_type_trans(), hippi_type_trans() left as-is. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/net/myri_sbus.c | 2 +- drivers/net/plip.c | 2 +- drivers/net/wan/farsync.c | 3 +-- drivers/net/wan/hdlc_cisco.c | 3 +-- drivers/net/wan/hdlc_ppp.c | 3 +-- drivers/net/wan/hdlc_raw.c | 3 +-- drivers/s390/net/qeth_main.c | 2 +- include/linux/etherdevice.h | 2 +- include/linux/fddidevice.h | 2 +- include/linux/hdlc.h | 4 ++-- include/linux/wanrouter.h | 3 +-- include/net/x25device.h | 3 +-- net/802/fddi.c | 4 ++-- net/atm/br2684.c | 3 +-- net/ethernet/eth.c | 2 +- net/wanrouter/wanmain.c | 6 +++--- 16 files changed, 20 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c index aad5494c83c..f0996ce5c26 100644 --- a/drivers/net/myri_sbus.c +++ b/drivers/net/myri_sbus.c @@ -369,7 +369,7 @@ static void myri_tx(struct myri_eth *mp, struct net_device *dev) * assume 802.3 if the type field is short enough to be a length. * This is normal practice and works for any 'now in use' protocol. */ -static unsigned short myri_type_trans(struct sk_buff *skb, struct net_device *dev) +static __be16 myri_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; unsigned char *rawp; diff --git a/drivers/net/plip.c b/drivers/net/plip.c index f4b62405d2e..21537ee3a6a 100644 --- a/drivers/net/plip.c +++ b/drivers/net/plip.c @@ -540,7 +540,7 @@ plip_receive(unsigned short nibble_timeout, struct net_device *dev, * in far too many old systems not all even running Linux. */ -static unsigned short plip_type_trans(struct sk_buff *skb, struct net_device *dev) +static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; unsigned char *rawp; diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 7217d44e885..2c83cca34b8 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -861,8 +861,7 @@ fst_tx_dma_complete(struct fst_card_info *card, struct fst_port_info *port, /* * Mark it for our own raw sockets interface */ -static unsigned short farsync_type_trans(struct sk_buff *skb, - struct net_device *dev) +static __be16 farsync_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; skb->mac.raw = skb->data; diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 87496843681..48c03c11cd9 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -91,8 +91,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, -static unsigned short cisco_type_trans(struct sk_buff *skb, - struct net_device *dev) +static __be16 cisco_type_trans(struct sk_buff *skb, struct net_device *dev) { hdlc_header *data = (hdlc_header*)skb->data; diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 7cd6195a2e4..b81263eaede 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -66,8 +66,7 @@ static void ppp_close(struct net_device *dev) -static unsigned short ppp_type_trans(struct sk_buff *skb, - struct net_device *dev) +static __be16 ppp_type_trans(struct sk_buff *skb, struct net_device *dev) { return __constant_htons(ETH_P_WAN_PPP); } diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c index c41fb70b692..9456d31cb1c 100644 --- a/drivers/net/wan/hdlc_raw.c +++ b/drivers/net/wan/hdlc_raw.c @@ -24,8 +24,7 @@ #include -static unsigned short raw_type_trans(struct sk_buff *skb, - struct net_device *dev) +static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev) { return __constant_htons(ETH_P_IP); } diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 3cb88c77003..8f4d2999af8 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -2210,7 +2210,7 @@ no_mem: return NULL; } -static inline unsigned short +static inline __be16 qeth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct qeth_card *card; diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index cf3847edc50..ce8518e658b 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -33,7 +33,7 @@ extern int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len); extern int eth_rebuild_header(struct sk_buff *skb); -extern unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev); +extern __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr); extern int eth_header_cache(struct neighbour *neigh, diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index 002f6367697..e61e42dfd31 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -25,7 +25,7 @@ #include #ifdef __KERNEL__ -extern unsigned short fddi_type_trans(struct sk_buff *skb, +extern __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); extern struct net_device *alloc_fddidev(int sizeof_priv); #endif diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index ed2927ef1ff..df695e9ae32 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -242,8 +242,8 @@ static __inline__ struct net_device_stats *hdlc_stats(struct net_device *dev) } -static __inline__ unsigned short hdlc_type_trans(struct sk_buff *skb, - struct net_device *dev) +static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb, + struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h index 3e89f0f15f4..1b6b76a4eb5 100644 --- a/include/linux/wanrouter.h +++ b/include/linux/wanrouter.h @@ -516,8 +516,7 @@ struct wan_device { /* Public functions available for device drivers */ extern int register_wan_device(struct wan_device *wandev); extern int unregister_wan_device(char *name); -unsigned short wanrouter_type_trans(struct sk_buff *skb, - struct net_device *dev); +__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev); int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short type); diff --git a/include/net/x25device.h b/include/net/x25device.h index cf36a20ea3c..d45ae883bd1 100644 --- a/include/net/x25device.h +++ b/include/net/x25device.h @@ -5,8 +5,7 @@ #include #include -static inline unsigned short x25_type_trans(struct sk_buff *skb, - struct net_device *dev) +static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->mac.raw = skb->data; skb->input_dev = skb->dev = dev; diff --git a/net/802/fddi.c b/net/802/fddi.c index ebcf4830d6f..5ce24c4bb84 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -122,10 +122,10 @@ static int fddi_rebuild_header(struct sk_buff *skb) * the proper pointer to the start of packet data (skb->data). */ -unsigned short fddi_type_trans(struct sk_buff *skb, struct net_device *dev) +__be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev) { struct fddihdr *fddi = (struct fddihdr *)skb->data; - unsigned short type; + __be16 type; /* * Set mac.raw field to point to FC byte, set data field to point diff --git a/net/atm/br2684.c b/net/atm/br2684.c index e6954cf1459..289956c4dd3 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -289,8 +289,7 @@ xmit will add the additional header part in that case */ * This is similar to eth_type_trans, which cannot be used because of * our dev->hard_header_len */ -static inline unsigned short br_type_trans(struct sk_buff *skb, - struct net_device *dev) +static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; unsigned char *rawp; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index ab60ea63688..f6dbfb99b14 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -155,7 +155,7 @@ int eth_rebuild_header(struct sk_buff *skb) * This is normal practice and works for any 'now in use' protocol. */ -unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev) +__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; unsigned char *rawp; diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index d6844ac226f..13b650ad22e 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -358,10 +358,10 @@ int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, */ -unsigned short wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) +__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) { int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ - unsigned short ethertype; + __be16 ethertype; switch (skb->data[cnt]) { case NLPID_IP: /* IP datagramm */ @@ -379,7 +379,7 @@ unsigned short wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) skb->data[cnt+3], dev->name); return 0; } - ethertype = *((unsigned short*)&skb->data[cnt+4]); + ethertype = *((__be16*)&skb->data[cnt+4]); cnt += 6; break; -- cgit v1.2.3-70-g09d2 From d53d9f16ea95a91ad4aa114809dcde486ca4000d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 12 Jul 2005 13:58:07 -0700 Subject: [PATCH] name_to_dev_t warning fix kernel/power/disk.c needs a declaration of name_to_dev_t() in scope. mount.h seems like an appropriate choice. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mount.h | 2 ++ init/do_mounts.c | 1 + init/do_mounts.h | 1 - kernel/power/disk.c | 2 ++ kernel/power/swsusp.c | 3 +-- 5 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 74b4727a4e3..f8f39937e30 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -12,6 +12,7 @@ #define _LINUX_MOUNT_H #ifdef __KERNEL__ +#include #include #include #include @@ -76,6 +77,7 @@ extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, extern void mark_mounts_for_expiry(struct list_head *mounts); extern spinlock_t vfsmount_lock; +extern dev_t name_to_dev_t(char *name); #endif #endif /* _LINUX_MOUNT_H */ diff --git a/init/do_mounts.c b/init/do_mounts.c index b7570c074d0..1b02be734cc 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/init/do_mounts.h b/init/do_mounts.h index de92bee4f35..e0a7ac9649e 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -9,7 +9,6 @@ #include #include -dev_t name_to_dev_t(char *name); void change_floppy(char *fmt, ...); void mount_block_root(char *name, int flags); void mount_root(void); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index c51a4d96d4e..3ec789c6b53 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -16,6 +16,8 @@ #include #include #include +#include + #include "power.h" diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 7d7801cd01f..f2bc71b9fe8 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -1260,8 +1261,6 @@ static int data_read(struct pbe *pblist) return error; } -extern dev_t name_to_dev_t(const char *line); - /** * read_pagedir - Read page backup list pages from swap */ -- cgit v1.2.3-70-g09d2 From 08c6a96fd77836856c090ebb39beadc81cb8484d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jul 2005 13:58:28 -0700 Subject: [PATCH] ext3: fix options parsing Fix a problem with ext3 mount option parsing. When remount of a filesystem fails, old options are now restored. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/super.c | 70 ++++++++++++++++++++++++++++++++++++++++++------- include/linux/ext3_fs.h | 14 ++++++++++ 2 files changed, 74 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index a6d1779d7de..3c3c6e399fb 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -890,7 +890,10 @@ clear_qf_name: "quota turned on.\n"); return 0; } - kfree(sbi->s_qf_names[qtype]); + /* + * The space will be released later when all options + * are confirmed to be correct + */ sbi->s_qf_names[qtype] = NULL; break; case Opt_jqfmt_vfsold: @@ -939,7 +942,7 @@ clear_qf_name: case Opt_ignore: break; case Opt_resize: - if (!n_blocks_count) { + if (!is_remount) { printk("EXT3-fs: resize option only available " "for remount\n"); return 0; @@ -2109,14 +2112,33 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) { struct ext3_super_block * es; struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long tmp; unsigned long n_blocks_count = 0; + unsigned long old_sb_flags; + struct ext3_mount_options old_opts; + int err; +#ifdef CONFIG_QUOTA + int i; +#endif + + /* Store the original options */ + old_sb_flags = sb->s_flags; + old_opts.s_mount_opt = sbi->s_mount_opt; + old_opts.s_resuid = sbi->s_resuid; + old_opts.s_resgid = sbi->s_resgid; + old_opts.s_commit_interval = sbi->s_commit_interval; +#ifdef CONFIG_QUOTA + old_opts.s_jquota_fmt = sbi->s_jquota_fmt; + for (i = 0; i < MAXQUOTAS; i++) + old_opts.s_qf_names[i] = sbi->s_qf_names[i]; +#endif /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sb, &tmp, &n_blocks_count, 1)) - return -EINVAL; + if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) { + err = -EINVAL; + goto restore_opts; + } if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); @@ -2130,8 +2152,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || n_blocks_count > le32_to_cpu(es->s_blocks_count)) { - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - return -EROFS; + if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) { + err = -EROFS; + goto restore_opts; + } if (*flags & MS_RDONLY) { /* @@ -2158,7 +2182,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) "remount RDWR because of unsupported " "optional features (%x).\n", sb->s_id, le32_to_cpu(ret)); - return -EROFS; + err = -EROFS; + goto restore_opts; } /* * Mounting a RDONLY partition read-write, so reread @@ -2168,13 +2193,38 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) */ ext3_clear_journal_err(sb, es); sbi->s_mount_state = le16_to_cpu(es->s_state); - if ((ret = ext3_group_extend(sb, es, n_blocks_count))) - return ret; + if ((ret = ext3_group_extend(sb, es, n_blocks_count))) { + err = ret; + goto restore_opts; + } if (!ext3_setup_super (sb, es, 0)) sb->s_flags &= ~MS_RDONLY; } } +#ifdef CONFIG_QUOTA + /* Release old quota file names */ + for (i = 0; i < MAXQUOTAS; i++) + if (old_opts.s_qf_names[i] && + old_opts.s_qf_names[i] != sbi->s_qf_names[i]) + kfree(old_opts.s_qf_names[i]); +#endif return 0; +restore_opts: + sb->s_flags = old_sb_flags; + sbi->s_mount_opt = old_opts.s_mount_opt; + sbi->s_resuid = old_opts.s_resuid; + sbi->s_resgid = old_opts.s_resgid; + sbi->s_commit_interval = old_opts.s_commit_interval; +#ifdef CONFIG_QUOTA + sbi->s_jquota_fmt = old_opts.s_jquota_fmt; + for (i = 0; i < MAXQUOTAS; i++) { + if (sbi->s_qf_names[i] && + old_opts.s_qf_names[i] != sbi->s_qf_names[i]) + kfree(sbi->s_qf_names[i]); + sbi->s_qf_names[i] = old_opts.s_qf_names[i]; + } +#endif + return err; } static int ext3_statfs (struct super_block * sb, struct kstatfs * buf) diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 4b6e1ab216a..c16662836c5 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -238,6 +238,20 @@ struct ext3_new_group_data { #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) +/* + * Mount options + */ +struct ext3_mount_options { + unsigned long s_mount_opt; + uid_t s_resuid; + gid_t s_resgid; + unsigned long s_commit_interval; +#ifdef CONFIG_QUOTA + int s_jquota_fmt; + char *s_qf_names[MAXQUOTAS]; +#endif +}; + /* * Structure of an inode on the disk */ -- cgit v1.2.3-70-g09d2 From 542d1c88bd7f73e2e59d41b12e4a9041deea89e4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 12 Jul 2005 13:58:31 -0700 Subject: [PATCH] tlb.h warning fix free_pages_and_swap_cache() and free_page_and_swap_cache() use release_pages() and page_cache_release() respectively, so make sure that we have the declarations in scope. Cc: Olaf Hering Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index c75954f2d86..239f520cc49 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -7,6 +7,8 @@ #include #include #include +#include + #include #include -- cgit v1.2.3-70-g09d2 From 67bc4eb0b1140a4bf364f2dcca152be659ed9057 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 12 Jul 2005 13:58:36 -0700 Subject: [PATCH] hardirq uses preempt hardirq.h uses preempt_count() from preempt.h Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8336dba1897..5912874ca83 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -2,6 +2,7 @@ #define LINUX_HARDIRQ_H #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From bd4c625c061c2a38568d0add3478f59172455159 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 12 Jul 2005 20:21:28 -0700 Subject: reiserfs: run scripts/Lindent on reiserfs code This was a pure indentation change, using: scripts/Lindent fs/reiserfs/*.c include/linux/reiserfs_*.h to make reiserfs match the regular Linux indentation style. As Jeff Mahoney writes: The ReiserFS code is a mix of a number of different coding styles, sometimes different even from line-to-line. Since the code has been relatively stable for quite some time and there are few outstanding patches to be applied, it is time to reformat the code to conform to the Linux style standard outlined in Documentation/CodingStyle. This patch contains the result of running scripts/Lindent against fs/reiserfs/*.c and include/linux/reiserfs_*.h. There are places where the code can be made to look better, but I'd rather keep those patches separate so that there isn't a subtle by-hand hand accident in the middle of a huge patch. To be clear: This patch is reformatting *only*. A number of patches may follow that continue to make the code more consistent with the Linux coding style. Hans wasn't particularly enthusiastic about these patches, but said he wouldn't really oppose them either. Signed-off-by: Linus Torvalds --- fs/reiserfs/bitmap.c | 1842 ++++++----- fs/reiserfs/dir.c | 488 +-- fs/reiserfs/do_balan.c | 3236 +++++++++++-------- fs/reiserfs/file.c | 2564 ++++++++------- fs/reiserfs/fix_node.c | 4051 +++++++++++------------ fs/reiserfs/hashes.c | 193 +- fs/reiserfs/ibalance.c | 1844 +++++------ fs/reiserfs/inode.c | 4915 ++++++++++++++-------------- fs/reiserfs/ioctl.c | 197 +- fs/reiserfs/item_ops.c | 977 +++--- fs/reiserfs/journal.c | 6891 +++++++++++++++++++++------------------- fs/reiserfs/lbalance.c | 2218 ++++++------- fs/reiserfs/namei.c | 2574 +++++++-------- fs/reiserfs/objectid.c | 303 +- fs/reiserfs/prints.c | 1003 +++--- fs/reiserfs/procfs.c | 695 ++-- fs/reiserfs/resize.c | 207 +- fs/reiserfs/stree.c | 3369 ++++++++++---------- fs/reiserfs/super.c | 3623 +++++++++++---------- fs/reiserfs/tail_conversion.c | 463 +-- fs/reiserfs/xattr.c | 2173 ++++++------- fs/reiserfs/xattr_acl.c | 641 ++-- fs/reiserfs/xattr_security.c | 54 +- fs/reiserfs/xattr_trusted.c | 70 +- fs/reiserfs/xattr_user.c | 89 +- include/linux/reiserfs_acl.h | 52 +- include/linux/reiserfs_fs.h | 1595 +++++----- include/linux/reiserfs_fs_i.h | 59 +- include/linux/reiserfs_fs_sb.h | 616 ++-- include/linux/reiserfs_xattr.h | 126 +- 30 files changed, 24447 insertions(+), 22681 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 49c479c9454..909f71e9a30 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -46,1125 +46,1221 @@ #define TEST_OPTION(optname, s) \ test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) -static inline void get_bit_address (struct super_block * s, - b_blocknr_t block, int * bmap_nr, int * offset) +static inline void get_bit_address(struct super_block *s, + b_blocknr_t block, int *bmap_nr, int *offset) { - /* It is in the bitmap block number equal to the block - * number divided by the number of bits in a block. */ - *bmap_nr = block / (s->s_blocksize << 3); - /* Within that bitmap block it is located at bit offset *offset. */ - *offset = block & ((s->s_blocksize << 3) - 1 ); - return; + /* It is in the bitmap block number equal to the block + * number divided by the number of bits in a block. */ + *bmap_nr = block / (s->s_blocksize << 3); + /* Within that bitmap block it is located at bit offset *offset. */ + *offset = block & ((s->s_blocksize << 3) - 1); + return; } #ifdef CONFIG_REISERFS_CHECK -int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value) +int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) { - int i, j; + int i, j; - if (block == 0 || block >= SB_BLOCK_COUNT (s)) { - reiserfs_warning (s, "vs-4010: is_reusable: block number is out of range %lu (%u)", - block, SB_BLOCK_COUNT (s)); - return 0; - } - - /* it can't be one of the bitmap blocks */ - for (i = 0; i < SB_BMAP_NR (s); i ++) - if (block == SB_AP_BITMAP (s)[i].bh->b_blocknr) { - reiserfs_warning (s, "vs: 4020: is_reusable: " - "bitmap block %lu(%u) can't be freed or reused", - block, SB_BMAP_NR (s)); - return 0; + if (block == 0 || block >= SB_BLOCK_COUNT(s)) { + reiserfs_warning(s, + "vs-4010: is_reusable: block number is out of range %lu (%u)", + block, SB_BLOCK_COUNT(s)); + return 0; } - - get_bit_address (s, block, &i, &j); - if (i >= SB_BMAP_NR (s)) { - reiserfs_warning (s, "vs-4030: is_reusable: there is no so many bitmap blocks: " - "block=%lu, bitmap_nr=%d", block, i); - return 0; - } + /* it can't be one of the bitmap blocks */ + for (i = 0; i < SB_BMAP_NR(s); i++) + if (block == SB_AP_BITMAP(s)[i].bh->b_blocknr) { + reiserfs_warning(s, "vs: 4020: is_reusable: " + "bitmap block %lu(%u) can't be freed or reused", + block, SB_BMAP_NR(s)); + return 0; + } - if ((bit_value == 0 && - reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) || - (bit_value == 1 && - reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i].bh->b_data) == 0)) { - reiserfs_warning (s, "vs-4040: is_reusable: corresponding bit of block %lu does not " - "match required value (i==%d, j==%d) test_bit==%d", - block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i].bh->b_data)); + get_bit_address(s, block, &i, &j); - return 0; - } + if (i >= SB_BMAP_NR(s)) { + reiserfs_warning(s, + "vs-4030: is_reusable: there is no so many bitmap blocks: " + "block=%lu, bitmap_nr=%d", block, i); + return 0; + } - if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) { - reiserfs_warning (s, "vs-4050: is_reusable: this is root block (%u), " - "it must be busy", SB_ROOT_BLOCK (s)); - return 0; - } + if ((bit_value == 0 && + reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) || + (bit_value == 1 && + reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data) == 0)) { + reiserfs_warning(s, + "vs-4040: is_reusable: corresponding bit of block %lu does not " + "match required value (i==%d, j==%d) test_bit==%d", + block, i, j, reiserfs_test_le_bit(j, + SB_AP_BITMAP + (s)[i].bh-> + b_data)); + + return 0; + } - return 1; + if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) { + reiserfs_warning(s, + "vs-4050: is_reusable: this is root block (%u), " + "it must be busy", SB_ROOT_BLOCK(s)); + return 0; + } + + return 1; } -#endif /* CONFIG_REISERFS_CHECK */ +#endif /* CONFIG_REISERFS_CHECK */ /* searches in journal structures for a given block number (bmap, off). If block is found in reiserfs journal it suggests next free block candidate to test. */ -static inline int is_block_in_journal (struct super_block * s, int bmap, int -off, int *next) +static inline int is_block_in_journal(struct super_block *s, int bmap, int + off, int *next) { - b_blocknr_t tmp; - - if (reiserfs_in_journal (s, bmap, off, 1, &tmp)) { - if (tmp) { /* hint supplied */ - *next = tmp; - PROC_INFO_INC( s, scan_bitmap.in_journal_hint ); - } else { - (*next) = off + 1; /* inc offset to avoid looping. */ - PROC_INFO_INC( s, scan_bitmap.in_journal_nohint ); + b_blocknr_t tmp; + + if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) { + if (tmp) { /* hint supplied */ + *next = tmp; + PROC_INFO_INC(s, scan_bitmap.in_journal_hint); + } else { + (*next) = off + 1; /* inc offset to avoid looping. */ + PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); + } + PROC_INFO_INC(s, scan_bitmap.retry); + return 1; } - PROC_INFO_INC( s, scan_bitmap.retry ); - return 1; - } - return 0; + return 0; } /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap * block; */ -static int scan_bitmap_block (struct reiserfs_transaction_handle *th, - int bmap_n, int *beg, int boundary, int min, int max, int unfm) +static int scan_bitmap_block(struct reiserfs_transaction_handle *th, + int bmap_n, int *beg, int boundary, int min, + int max, int unfm) { - struct super_block *s = th->t_super; - struct reiserfs_bitmap_info *bi=&SB_AP_BITMAP(s)[bmap_n]; - int end, next; - int org = *beg; + struct super_block *s = th->t_super; + struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n]; + int end, next; + int org = *beg; - BUG_ON (!th->t_trans_id); + BUG_ON(!th->t_trans_id); - RFALSE(bmap_n >= SB_BMAP_NR (s), "Bitmap %d is out of range (0..%d)",bmap_n, SB_BMAP_NR (s) - 1); - PROC_INFO_INC( s, scan_bitmap.bmap ); + RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)", + bmap_n, SB_BMAP_NR(s) - 1); + PROC_INFO_INC(s, scan_bitmap.bmap); /* this is unclear and lacks comments, explain how journal bitmaps work here for the reader. Convey a sense of the design here. What is a window? */ /* - I mean `a window of zero bits' as in description of this function - Zam. */ - - if ( !bi ) { - reiserfs_warning (s, "NULL bitmap info pointer for bitmap %d", bmap_n); - return 0; - } - if (buffer_locked (bi->bh)) { - PROC_INFO_INC( s, scan_bitmap.wait ); - __wait_on_buffer (bi->bh); - } - - while (1) { - cont: - if (bi->free_count < min) - return 0; // No free blocks in this bitmap - - /* search for a first zero bit -- beggining of a window */ - *beg = reiserfs_find_next_zero_le_bit - ((unsigned long*)(bi->bh->b_data), boundary, *beg); - - if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block - * cannot contain a zero window of minimum size */ - return 0; - } - if (unfm && is_block_in_journal(s,bmap_n, *beg, beg)) - continue; - /* first zero bit found; we check next bits */ - for (end = *beg + 1;; end ++) { - if (end >= *beg + max || end >= boundary || reiserfs_test_le_bit (end, bi->bh->b_data)) { - next = end; - break; - } - /* finding the other end of zero bit window requires looking into journal structures (in - * case of searching for free blocks for unformatted nodes) */ - if (unfm && is_block_in_journal(s, bmap_n, end, &next)) - break; + if (!bi) { + reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d", + bmap_n); + return 0; + } + if (buffer_locked(bi->bh)) { + PROC_INFO_INC(s, scan_bitmap.wait); + __wait_on_buffer(bi->bh); } - /* now (*beg) points to beginning of zero bits window, - * (end) points to one bit after the window end */ - if (end - *beg >= min) { /* it seems we have found window of proper size */ - int i; - reiserfs_prepare_for_journal (s, bi->bh, 1); - /* try to set all blocks used checking are they still free */ - for (i = *beg; i < end; i++) { - /* It seems that we should not check in journal again. */ - if (reiserfs_test_and_set_le_bit (i, bi->bh->b_data)) { - /* bit was set by another process - * while we slept in prepare_for_journal() */ - PROC_INFO_INC( s, scan_bitmap.stolen ); - if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks, - * if length of this set is more or equal to `min' */ - end = i; - break; - } - /* otherwise we clear all bit were set ... */ - while (--i >= *beg) - reiserfs_test_and_clear_le_bit (i, bi->bh->b_data); - reiserfs_restore_prepared_buffer (s, bi->bh); - *beg = org; - /* ... and search again in current block from beginning */ - goto cont; + while (1) { + cont: + if (bi->free_count < min) + return 0; // No free blocks in this bitmap + + /* search for a first zero bit -- beggining of a window */ + *beg = reiserfs_find_next_zero_le_bit + ((unsigned long *)(bi->bh->b_data), boundary, *beg); + + if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block + * cannot contain a zero window of minimum size */ + return 0; } - } - bi->free_count -= (end - *beg); - journal_mark_dirty (th, s, bi->bh); - /* free block count calculation */ - reiserfs_prepare_for_journal (s, SB_BUFFER_WITH_SB(s), 1); - PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); - journal_mark_dirty (th, s, SB_BUFFER_WITH_SB(s)); + if (unfm && is_block_in_journal(s, bmap_n, *beg, beg)) + continue; + /* first zero bit found; we check next bits */ + for (end = *beg + 1;; end++) { + if (end >= *beg + max || end >= boundary + || reiserfs_test_le_bit(end, bi->bh->b_data)) { + next = end; + break; + } + /* finding the other end of zero bit window requires looking into journal structures (in + * case of searching for free blocks for unformatted nodes) */ + if (unfm && is_block_in_journal(s, bmap_n, end, &next)) + break; + } - return end - (*beg); - } else { - *beg = next; + /* now (*beg) points to beginning of zero bits window, + * (end) points to one bit after the window end */ + if (end - *beg >= min) { /* it seems we have found window of proper size */ + int i; + reiserfs_prepare_for_journal(s, bi->bh, 1); + /* try to set all blocks used checking are they still free */ + for (i = *beg; i < end; i++) { + /* It seems that we should not check in journal again. */ + if (reiserfs_test_and_set_le_bit + (i, bi->bh->b_data)) { + /* bit was set by another process + * while we slept in prepare_for_journal() */ + PROC_INFO_INC(s, scan_bitmap.stolen); + if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks, + * if length of this set is more or equal to `min' */ + end = i; + break; + } + /* otherwise we clear all bit were set ... */ + while (--i >= *beg) + reiserfs_test_and_clear_le_bit + (i, bi->bh->b_data); + reiserfs_restore_prepared_buffer(s, + bi-> + bh); + *beg = org; + /* ... and search again in current block from beginning */ + goto cont; + } + } + bi->free_count -= (end - *beg); + journal_mark_dirty(th, s, bi->bh); + + /* free block count calculation */ + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), + 1); + PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); + journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); + + return end - (*beg); + } else { + *beg = next; + } } - } } -static int bmap_hash_id(struct super_block *s, u32 id) { - char * hash_in = NULL; - unsigned long hash; - unsigned bm; - - if (id <= 2) { - bm = 1; - } else { - hash_in = (char *)(&id); - hash = keyed_hash(hash_in, 4); - bm = hash % SB_BMAP_NR(s); - if (!bm) - bm = 1; - } - /* this can only be true when SB_BMAP_NR = 1 */ - if (bm >= SB_BMAP_NR(s)) - bm = 0; - return bm; +static int bmap_hash_id(struct super_block *s, u32 id) +{ + char *hash_in = NULL; + unsigned long hash; + unsigned bm; + + if (id <= 2) { + bm = 1; + } else { + hash_in = (char *)(&id); + hash = keyed_hash(hash_in, 4); + bm = hash % SB_BMAP_NR(s); + if (!bm) + bm = 1; + } + /* this can only be true when SB_BMAP_NR = 1 */ + if (bm >= SB_BMAP_NR(s)) + bm = 0; + return bm; } /* * hashes the id and then returns > 0 if the block group for the * corresponding hash is full */ -static inline int block_group_used(struct super_block *s, u32 id) { - int bm; - bm = bmap_hash_id(s, id); - if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100) ) { - return 0; - } - return 1; +static inline int block_group_used(struct super_block *s, u32 id) +{ + int bm; + bm = bmap_hash_id(s, id); + if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100)) { + return 0; + } + return 1; } /* * the packing is returned in disk byte order */ -__le32 reiserfs_choose_packing(struct inode *dir) +__le32 reiserfs_choose_packing(struct inode * dir) { - __le32 packing; - if (TEST_OPTION(packing_groups, dir->i_sb)) { - u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); - /* - * some versions of reiserfsck expect packing locality 1 to be - * special - */ - if (parent_dir == 1 || block_group_used(dir->i_sb,parent_dir)) - packing = INODE_PKEY(dir)->k_objectid; - else - packing = INODE_PKEY(dir)->k_dir_id; - } else - packing = INODE_PKEY(dir)->k_objectid; - return packing; + __le32 packing; + if (TEST_OPTION(packing_groups, dir->i_sb)) { + u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); + /* + * some versions of reiserfsck expect packing locality 1 to be + * special + */ + if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir)) + packing = INODE_PKEY(dir)->k_objectid; + else + packing = INODE_PKEY(dir)->k_dir_id; + } else + packing = INODE_PKEY(dir)->k_objectid; + return packing; } - + /* Tries to find contiguous zero bit window (given size) in given region of * bitmap and place new blocks there. Returns number of allocated blocks. */ -static int scan_bitmap (struct reiserfs_transaction_handle *th, - b_blocknr_t *start, b_blocknr_t finish, - int min, int max, int unfm, unsigned long file_block) +static int scan_bitmap(struct reiserfs_transaction_handle *th, + b_blocknr_t * start, b_blocknr_t finish, + int min, int max, int unfm, unsigned long file_block) { - int nr_allocated=0; - struct super_block * s = th->t_super; - /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr - * - Hans, it is not a block number - Zam. */ - - int bm, off; - int end_bm, end_off; - int off_max = s->s_blocksize << 3; - - BUG_ON (!th->t_trans_id); - - PROC_INFO_INC( s, scan_bitmap.call ); - if ( SB_FREE_BLOCKS(s) <= 0) - return 0; // No point in looking for more free blocks - - get_bit_address (s, *start, &bm, &off); - get_bit_address (s, finish, &end_bm, &end_off); - if (bm > SB_BMAP_NR(s)) - return 0; - if (end_bm > SB_BMAP_NR(s)) - end_bm = SB_BMAP_NR(s); - - /* When the bitmap is more than 10% free, anyone can allocate. - * When it's less than 10% free, only files that already use the - * bitmap are allowed. Once we pass 80% full, this restriction - * is lifted. - * - * We do this so that files that grow later still have space close to - * their original allocation. This improves locality, and presumably - * performance as a result. - * - * This is only an allocation policy and does not make up for getting a - * bad hint. Decent hinting must be implemented for this to work well. - */ - if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) { - for (;bm < end_bm; bm++, off = 0) { - if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 ) - nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); - if (nr_allocated) - goto ret; - } - /* we know from above that start is a reasonable number */ - get_bit_address (s, *start, &bm, &off); - } - - for (;bm < end_bm; bm++, off = 0) { - nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); - if (nr_allocated) - goto ret; - } - - nr_allocated = scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); - - ret: - *start = bm * off_max + off; - return nr_allocated; + int nr_allocated = 0; + struct super_block *s = th->t_super; + /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr + * - Hans, it is not a block number - Zam. */ + + int bm, off; + int end_bm, end_off; + int off_max = s->s_blocksize << 3; + + BUG_ON(!th->t_trans_id); + + PROC_INFO_INC(s, scan_bitmap.call); + if (SB_FREE_BLOCKS(s) <= 0) + return 0; // No point in looking for more free blocks + + get_bit_address(s, *start, &bm, &off); + get_bit_address(s, finish, &end_bm, &end_off); + if (bm > SB_BMAP_NR(s)) + return 0; + if (end_bm > SB_BMAP_NR(s)) + end_bm = SB_BMAP_NR(s); + + /* When the bitmap is more than 10% free, anyone can allocate. + * When it's less than 10% free, only files that already use the + * bitmap are allowed. Once we pass 80% full, this restriction + * is lifted. + * + * We do this so that files that grow later still have space close to + * their original allocation. This improves locality, and presumably + * performance as a result. + * + * This is only an allocation policy and does not make up for getting a + * bad hint. Decent hinting must be implemented for this to work well. + */ + if (TEST_OPTION(skip_busy, s) + && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) { + for (; bm < end_bm; bm++, off = 0) { + if ((off && (!unfm || (file_block != 0))) + || SB_AP_BITMAP(s)[bm].free_count > + (s->s_blocksize << 3) / 10) + nr_allocated = + scan_bitmap_block(th, bm, &off, off_max, + min, max, unfm); + if (nr_allocated) + goto ret; + } + /* we know from above that start is a reasonable number */ + get_bit_address(s, *start, &bm, &off); + } + + for (; bm < end_bm; bm++, off = 0) { + nr_allocated = + scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); + if (nr_allocated) + goto ret; + } + + nr_allocated = + scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); + + ret: + *start = bm * off_max + off; + return nr_allocated; } -static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block, - int for_unformatted) +static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, + struct inode *inode, b_blocknr_t block, + int for_unformatted) { - struct super_block * s = th->t_super; - struct reiserfs_super_block * rs; - struct buffer_head * sbh; - struct reiserfs_bitmap_info *apbi; - int nr, offset; + struct super_block *s = th->t_super; + struct reiserfs_super_block *rs; + struct buffer_head *sbh; + struct reiserfs_bitmap_info *apbi; + int nr, offset; - BUG_ON (!th->t_trans_id); + BUG_ON(!th->t_trans_id); - PROC_INFO_INC( s, free_block ); + PROC_INFO_INC(s, free_block); - rs = SB_DISK_SUPER_BLOCK (s); - sbh = SB_BUFFER_WITH_SB (s); - apbi = SB_AP_BITMAP(s); + rs = SB_DISK_SUPER_BLOCK(s); + sbh = SB_BUFFER_WITH_SB(s); + apbi = SB_AP_BITMAP(s); - get_bit_address (s, block, &nr, &offset); + get_bit_address(s, block, &nr, &offset); - if (nr >= sb_bmap_nr (rs)) { - reiserfs_warning (s, "vs-4075: reiserfs_free_block: " - "block %lu is out of range on %s", - block, reiserfs_bdevname (s)); - return; - } - - reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ; - - /* clear bit for the given block in bit map */ - if (!reiserfs_test_and_clear_le_bit (offset, apbi[nr].bh->b_data)) { - reiserfs_warning (s, "vs-4080: reiserfs_free_block: " - "free_block (%s:%lu)[dev:blocknr]: bit already cleared", - reiserfs_bdevname (s), block); - } - apbi[nr].free_count ++; - journal_mark_dirty (th, s, apbi[nr].bh); - - reiserfs_prepare_for_journal(s, sbh, 1) ; - /* update super block */ - set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 ); - - journal_mark_dirty (th, s, sbh); - if (for_unformatted) - DQUOT_FREE_BLOCK_NODIRTY(inode, 1); + if (nr >= sb_bmap_nr(rs)) { + reiserfs_warning(s, "vs-4075: reiserfs_free_block: " + "block %lu is out of range on %s", + block, reiserfs_bdevname(s)); + return; + } + + reiserfs_prepare_for_journal(s, apbi[nr].bh, 1); + + /* clear bit for the given block in bit map */ + if (!reiserfs_test_and_clear_le_bit(offset, apbi[nr].bh->b_data)) { + reiserfs_warning(s, "vs-4080: reiserfs_free_block: " + "free_block (%s:%lu)[dev:blocknr]: bit already cleared", + reiserfs_bdevname(s), block); + } + apbi[nr].free_count++; + journal_mark_dirty(th, s, apbi[nr].bh); + + reiserfs_prepare_for_journal(s, sbh, 1); + /* update super block */ + set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); + + journal_mark_dirty(th, s, sbh); + if (for_unformatted) + DQUOT_FREE_BLOCK_NODIRTY(inode, 1); } -void reiserfs_free_block (struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block, - int for_unformatted) +void reiserfs_free_block(struct reiserfs_transaction_handle *th, + struct inode *inode, b_blocknr_t block, + int for_unformatted) { - struct super_block * s = th->t_super; + struct super_block *s = th->t_super; - BUG_ON (!th->t_trans_id); + BUG_ON(!th->t_trans_id); - RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); - RFALSE(is_reusable (s, block, 1) == 0, "vs-4071: can not free such block"); - /* mark it before we clear it, just in case */ - journal_mark_freed(th, s, block) ; - _reiserfs_free_block(th, inode, block, for_unformatted) ; + RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); + RFALSE(is_reusable(s, block, 1) == 0, + "vs-4071: can not free such block"); + /* mark it before we clear it, just in case */ + journal_mark_freed(th, s, block); + _reiserfs_free_block(th, inode, block, for_unformatted); } /* preallocated blocks don't need to be run through journal_mark_freed */ -static void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block) { - RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device"); - RFALSE(is_reusable (th->t_super, block, 1) == 0, "vs-4070: can not free such block"); - BUG_ON (!th->t_trans_id); - _reiserfs_free_block(th, inode, block, 1) ; +static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th, + struct inode *inode, b_blocknr_t block) +{ + RFALSE(!th->t_super, + "vs-4060: trying to free block on nonexistent device"); + RFALSE(is_reusable(th->t_super, block, 1) == 0, + "vs-4070: can not free such block"); + BUG_ON(!th->t_trans_id); + _reiserfs_free_block(th, inode, block, 1); } -static void __discard_prealloc (struct reiserfs_transaction_handle * th, - struct reiserfs_inode_info *ei) +static void __discard_prealloc(struct reiserfs_transaction_handle *th, + struct reiserfs_inode_info *ei) { - unsigned long save = ei->i_prealloc_block ; - int dirty = 0; - struct inode *inode = &ei->vfs_inode; - BUG_ON (!th->t_trans_id); + unsigned long save = ei->i_prealloc_block; + int dirty = 0; + struct inode *inode = &ei->vfs_inode; + BUG_ON(!th->t_trans_id); #ifdef CONFIG_REISERFS_CHECK - if (ei->i_prealloc_count < 0) - reiserfs_warning (th->t_super, "zam-4001:%s: inode has negative prealloc blocks count.", __FUNCTION__ ); + if (ei->i_prealloc_count < 0) + reiserfs_warning(th->t_super, + "zam-4001:%s: inode has negative prealloc blocks count.", + __FUNCTION__); #endif - while (ei->i_prealloc_count > 0) { - reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); - ei->i_prealloc_block++; - ei->i_prealloc_count --; - dirty = 1; - } - if (dirty) - reiserfs_update_sd(th, inode); - ei->i_prealloc_block = save; - list_del_init(&(ei->i_prealloc_list)); + while (ei->i_prealloc_count > 0) { + reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); + ei->i_prealloc_block++; + ei->i_prealloc_count--; + dirty = 1; + } + if (dirty) + reiserfs_update_sd(th, inode); + ei->i_prealloc_block = save; + list_del_init(&(ei->i_prealloc_list)); } /* FIXME: It should be inline function */ -void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, - struct inode *inode) +void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, + struct inode *inode) { - struct reiserfs_inode_info *ei = REISERFS_I(inode); - BUG_ON (!th->t_trans_id); - if (ei->i_prealloc_count) - __discard_prealloc(th, ei); + struct reiserfs_inode_info *ei = REISERFS_I(inode); + BUG_ON(!th->t_trans_id); + if (ei->i_prealloc_count) + __discard_prealloc(th, ei); } -void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th) +void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th) { - struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; + struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; - BUG_ON (!th->t_trans_id); + BUG_ON(!th->t_trans_id); - while (!list_empty(plist)) { - struct reiserfs_inode_info *ei; - ei = list_entry(plist->next, struct reiserfs_inode_info, i_prealloc_list); + while (!list_empty(plist)) { + struct reiserfs_inode_info *ei; + ei = list_entry(plist->next, struct reiserfs_inode_info, + i_prealloc_list); #ifdef CONFIG_REISERFS_CHECK - if (!ei->i_prealloc_count) { - reiserfs_warning (th->t_super, "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", __FUNCTION__); - } + if (!ei->i_prealloc_count) { + reiserfs_warning(th->t_super, + "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", + __FUNCTION__); + } #endif - __discard_prealloc(th, ei); - } + __discard_prealloc(th, ei); + } } -void reiserfs_init_alloc_options (struct super_block *s) +void reiserfs_init_alloc_options(struct super_block *s) { - set_bit (_ALLOC_skip_busy, &SB_ALLOC_OPTS(s)); - set_bit (_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s)); - set_bit (_ALLOC_packing_groups, &SB_ALLOC_OPTS(s)); + set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s)); + set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s)); + set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s)); } /* block allocator related options are parsed here */ -int reiserfs_parse_alloc_options(struct super_block * s, char * options) +int reiserfs_parse_alloc_options(struct super_block *s, char *options) { - char * this_char, * value; - - REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ - - while ( (this_char = strsep (&options, ":")) != NULL ) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; - - if (!strcmp(this_char, "concentrating_formatted_nodes")) { - int temp; - SET_OPTION(concentrating_formatted_nodes); - temp = (value && *value) ? simple_strtoul (value, &value, 0) : 10; - if (temp <= 0 || temp > 100) { - REISERFS_SB(s)->s_alloc_options.border = 10; - } else { - REISERFS_SB(s)->s_alloc_options.border = 100 / temp; - } - continue; - } - if (!strcmp(this_char, "displacing_large_files")) { - SET_OPTION(displacing_large_files); - REISERFS_SB(s)->s_alloc_options.large_file_size = - (value && *value) ? simple_strtoul (value, &value, 0) : 16; - continue; - } - if (!strcmp(this_char, "displacing_new_packing_localities")) { - SET_OPTION(displacing_new_packing_localities); - continue; - }; - - if (!strcmp(this_char, "old_hashed_relocation")) { - SET_OPTION(old_hashed_relocation); - continue; - } + char *this_char, *value; + + REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ + + while ((this_char = strsep(&options, ":")) != NULL) { + if ((value = strchr(this_char, '=')) != NULL) + *value++ = 0; + + if (!strcmp(this_char, "concentrating_formatted_nodes")) { + int temp; + SET_OPTION(concentrating_formatted_nodes); + temp = (value + && *value) ? simple_strtoul(value, &value, + 0) : 10; + if (temp <= 0 || temp > 100) { + REISERFS_SB(s)->s_alloc_options.border = 10; + } else { + REISERFS_SB(s)->s_alloc_options.border = + 100 / temp; + } + continue; + } + if (!strcmp(this_char, "displacing_large_files")) { + SET_OPTION(displacing_large_files); + REISERFS_SB(s)->s_alloc_options.large_file_size = + (value + && *value) ? simple_strtoul(value, &value, 0) : 16; + continue; + } + if (!strcmp(this_char, "displacing_new_packing_localities")) { + SET_OPTION(displacing_new_packing_localities); + continue; + }; + + if (!strcmp(this_char, "old_hashed_relocation")) { + SET_OPTION(old_hashed_relocation); + continue; + } - if (!strcmp(this_char, "new_hashed_relocation")) { - SET_OPTION(new_hashed_relocation); - continue; - } + if (!strcmp(this_char, "new_hashed_relocation")) { + SET_OPTION(new_hashed_relocation); + continue; + } - if (!strcmp(this_char, "dirid_groups")) { - SET_OPTION(dirid_groups); - continue; - } - if (!strcmp(this_char, "oid_groups")) { - SET_OPTION(oid_groups); - continue; - } - if (!strcmp(this_char, "packing_groups")) { - SET_OPTION(packing_groups); - continue; - } - if (!strcmp(this_char, "hashed_formatted_nodes")) { - SET_OPTION(hashed_formatted_nodes); - continue; - } + if (!strcmp(this_char, "dirid_groups")) { + SET_OPTION(dirid_groups); + continue; + } + if (!strcmp(this_char, "oid_groups")) { + SET_OPTION(oid_groups); + continue; + } + if (!strcmp(this_char, "packing_groups")) { + SET_OPTION(packing_groups); + continue; + } + if (!strcmp(this_char, "hashed_formatted_nodes")) { + SET_OPTION(hashed_formatted_nodes); + continue; + } - if (!strcmp(this_char, "skip_busy")) { - SET_OPTION(skip_busy); - continue; - } + if (!strcmp(this_char, "skip_busy")) { + SET_OPTION(skip_busy); + continue; + } - if (!strcmp(this_char, "hundredth_slices")) { - SET_OPTION(hundredth_slices); - continue; - } + if (!strcmp(this_char, "hundredth_slices")) { + SET_OPTION(hundredth_slices); + continue; + } - if (!strcmp(this_char, "old_way")) { - SET_OPTION(old_way); - continue; - } + if (!strcmp(this_char, "old_way")) { + SET_OPTION(old_way); + continue; + } - if (!strcmp(this_char, "displace_based_on_dirid")) { - SET_OPTION(displace_based_on_dirid); - continue; - } + if (!strcmp(this_char, "displace_based_on_dirid")) { + SET_OPTION(displace_based_on_dirid); + continue; + } - if (!strcmp(this_char, "preallocmin")) { - REISERFS_SB(s)->s_alloc_options.preallocmin = - (value && *value) ? simple_strtoul (value, &value, 0) : 4; - continue; - } + if (!strcmp(this_char, "preallocmin")) { + REISERFS_SB(s)->s_alloc_options.preallocmin = + (value + && *value) ? simple_strtoul(value, &value, 0) : 4; + continue; + } + + if (!strcmp(this_char, "preallocsize")) { + REISERFS_SB(s)->s_alloc_options.preallocsize = + (value + && *value) ? simple_strtoul(value, &value, + 0) : + PREALLOCATION_SIZE; + continue; + } - if (!strcmp(this_char, "preallocsize")) { - REISERFS_SB(s)->s_alloc_options.preallocsize = - (value && *value) ? simple_strtoul (value, &value, 0) : PREALLOCATION_SIZE; - continue; + reiserfs_warning(s, "zam-4001: %s : unknown option - %s", + __FUNCTION__, this_char); + return 1; } - reiserfs_warning (s, "zam-4001: %s : unknown option - %s", - __FUNCTION__ , this_char); - return 1; - } - - reiserfs_warning (s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s)); - return 0; + reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s)); + return 0; } - -static inline void new_hashed_relocation (reiserfs_blocknr_hint_t * hint) + +static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) { - char * hash_in; - if (hint->formatted_node) { - hash_in = (char*)&hint->key.k_dir_id; - } else { - if (!hint->inode) { - //hint->search_start = hint->beg; - hash_in = (char*)&hint->key.k_dir_id; - } else - if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); - else - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); - } + char *hash_in; + if (hint->formatted_node) { + hash_in = (char *)&hint->key.k_dir_id; + } else { + if (!hint->inode) { + //hint->search_start = hint->beg; + hash_in = (char *)&hint->key.k_dir_id; + } else + if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) + hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); + else + hash_in = + (char *)(&INODE_PKEY(hint->inode)->k_objectid); + } - hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); + hint->search_start = + hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); } /* * Relocation based on dirid, hashing them into a given bitmap block * files. Formatted nodes are unaffected, a seperate policy covers them */ -static void -dirid_groups (reiserfs_blocknr_hint_t *hint) +static void dirid_groups(reiserfs_blocknr_hint_t * hint) { - unsigned long hash; - __u32 dirid = 0; - int bm = 0; - struct super_block *sb = hint->th->t_super; - if (hint->inode) - dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); - else if (hint->formatted_node) - dirid = hint->key.k_dir_id; - - if (dirid) { - bm = bmap_hash_id(sb, dirid); - hash = bm * (sb->s_blocksize << 3); - /* give a portion of the block group to metadata */ + unsigned long hash; + __u32 dirid = 0; + int bm = 0; + struct super_block *sb = hint->th->t_super; if (hint->inode) - hash += sb->s_blocksize/2; - hint->search_start = hash; - } + dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); + else if (hint->formatted_node) + dirid = hint->key.k_dir_id; + + if (dirid) { + bm = bmap_hash_id(sb, dirid); + hash = bm * (sb->s_blocksize << 3); + /* give a portion of the block group to metadata */ + if (hint->inode) + hash += sb->s_blocksize / 2; + hint->search_start = hash; + } } /* * Relocation based on oid, hashing them into a given bitmap block * files. Formatted nodes are unaffected, a seperate policy covers them */ -static void -oid_groups (reiserfs_blocknr_hint_t *hint) +static void oid_groups(reiserfs_blocknr_hint_t * hint) { - if (hint->inode) { - unsigned long hash; - __u32 oid; - __u32 dirid; - int bm; - - dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); - - /* keep the root dir and it's first set of subdirs close to - * the start of the disk - */ - if (dirid <= 2) - hash = (hint->inode->i_sb->s_blocksize << 3); - else { - oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid); - bm = bmap_hash_id(hint->inode->i_sb, oid); - hash = bm * (hint->inode->i_sb->s_blocksize << 3); + if (hint->inode) { + unsigned long hash; + __u32 oid; + __u32 dirid; + int bm; + + dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); + + /* keep the root dir and it's first set of subdirs close to + * the start of the disk + */ + if (dirid <= 2) + hash = (hint->inode->i_sb->s_blocksize << 3); + else { + oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid); + bm = bmap_hash_id(hint->inode->i_sb, oid); + hash = bm * (hint->inode->i_sb->s_blocksize << 3); + } + hint->search_start = hash; } - hint->search_start = hash; - } } /* returns 1 if it finds an indirect item and gets valid hint info * from it, otherwise 0 */ -static int get_left_neighbor(reiserfs_blocknr_hint_t *hint) +static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) { - struct path * path; - struct buffer_head * bh; - struct item_head * ih; - int pos_in_item; - __le32 * item; - int ret = 0; - - if (!hint->path) /* reiserfs code can call this function w/o pointer to path + struct path *path; + struct buffer_head *bh; + struct item_head *ih; + int pos_in_item; + __le32 *item; + int ret = 0; + + if (!hint->path) /* reiserfs code can call this function w/o pointer to path * structure supplied; then we rely on supplied search_start */ - return 0; - - path = hint->path; - bh = get_last_bh(path); - RFALSE( !bh, "green-4002: Illegal path specified to get_left_neighbor"); - ih = get_ih(path); - pos_in_item = path->pos_in_item; - item = get_item (path); - - hint->search_start = bh->b_blocknr; - - if (!hint->formatted_node && is_indirect_le_ih (ih)) { - /* for indirect item: go to left and look for the first non-hole entry - in the indirect item */ - if (pos_in_item == I_UNFM_NUM (ih)) - pos_in_item--; -// pos_in_item = I_UNFM_NUM (ih) - 1; - while (pos_in_item >= 0) { - int t=get_block_num(item,pos_in_item); - if (t) { - hint->search_start = t; - ret = 1; - break; - } - pos_in_item --; + return 0; + + path = hint->path; + bh = get_last_bh(path); + RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor"); + ih = get_ih(path); + pos_in_item = path->pos_in_item; + item = get_item(path); + + hint->search_start = bh->b_blocknr; + + if (!hint->formatted_node && is_indirect_le_ih(ih)) { + /* for indirect item: go to left and look for the first non-hole entry + in the indirect item */ + if (pos_in_item == I_UNFM_NUM(ih)) + pos_in_item--; +// pos_in_item = I_UNFM_NUM (ih) - 1; + while (pos_in_item >= 0) { + int t = get_block_num(item, pos_in_item); + if (t) { + hint->search_start = t; + ret = 1; + break; + } + pos_in_item--; + } } - } - /* does result value fit into specified region? */ - return ret; + /* does result value fit into specified region? */ + return ret; } /* should be, if formatted node, then try to put on first part of the device specified as number of percent with mount option device, else try to put on last of device. This is not to say it is good code to do so, but the effect should be measured. */ -static inline void set_border_in_hint(struct super_block *s, reiserfs_blocknr_hint_t *hint) +static inline void set_border_in_hint(struct super_block *s, + reiserfs_blocknr_hint_t * hint) { - b_blocknr_t border = SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border; + b_blocknr_t border = + SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border; - if (hint->formatted_node) - hint->end = border - 1; - else - hint->beg = border; + if (hint->formatted_node) + hint->end = border - 1; + else + hint->beg = border; } -static inline void displace_large_file(reiserfs_blocknr_hint_t *hint) +static inline void displace_large_file(reiserfs_blocknr_hint_t * hint) { - if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), 4) % (hint->end - hint->beg); - else - hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), 4) % (hint->end - hint->beg); + if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) + hint->search_start = + hint->beg + + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), + 4) % (hint->end - hint->beg); + else + hint->search_start = + hint->beg + + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), + 4) % (hint->end - hint->beg); } -static inline void hash_formatted_node(reiserfs_blocknr_hint_t *hint) +static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint) { - char * hash_in; + char *hash_in; - if (!hint->inode) - hash_in = (char*)&hint->key.k_dir_id; - else if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); - else - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); + if (!hint->inode) + hash_in = (char *)&hint->key.k_dir_id; + else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) + hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); + else + hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); - hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); + hint->search_start = + hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); } -static inline int this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *hint) +static inline int +this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t * + hint) { - return hint->block == REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size; + return hint->block == + REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size; } #ifdef DISPLACE_NEW_PACKING_LOCALITIES -static inline void displace_new_packing_locality (reiserfs_blocknr_hint_t *hint) +static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint) { - struct in_core_key * key = &hint->key; + struct in_core_key *key = &hint->key; - hint->th->displace_new_blocks = 0; - hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg); + hint->th->displace_new_blocks = 0; + hint->search_start = + hint->beg + keyed_hash((char *)(&key->k_objectid), + 4) % (hint->end - hint->beg); } - #endif +#endif -static inline int old_hashed_relocation (reiserfs_blocknr_hint_t * hint) +static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint) { - b_blocknr_t border; - u32 hash_in; - - if (hint->formatted_node || hint->inode == NULL) { - return 0; - } + b_blocknr_t border; + u32 hash_in; - hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); - border = hint->beg + (u32) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1); - if (border > hint->search_start) - hint->search_start = border; + if (hint->formatted_node || hint->inode == NULL) { + return 0; + } - return 1; - } - -static inline int old_way (reiserfs_blocknr_hint_t * hint) -{ - b_blocknr_t border; - - if (hint->formatted_node || hint->inode == NULL) { - return 0; - } - - border = hint->beg + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - hint->beg); - if (border > hint->search_start) - hint->search_start = border; + hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); + border = + hint->beg + (u32) keyed_hash(((char *)(&hash_in)), + 4) % (hint->end - hint->beg - 1); + if (border > hint->search_start) + hint->search_start = border; - return 1; + return 1; } -static inline void hundredth_slices (reiserfs_blocknr_hint_t * hint) +static inline int old_way(reiserfs_blocknr_hint_t * hint) { - struct in_core_key * key = &hint->key; - b_blocknr_t slice_start; + b_blocknr_t border; + + if (hint->formatted_node || hint->inode == NULL) { + return 0; + } + + border = + hint->beg + + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - + hint->beg); + if (border > hint->search_start) + hint->search_start = border; - slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100); - if ( slice_start > hint->search_start || slice_start + (hint->end / 100) <= hint->search_start) { - hint->search_start = slice_start; - } + return 1; +} + +static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint) +{ + struct in_core_key *key = &hint->key; + b_blocknr_t slice_start; + + slice_start = + (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100); + if (slice_start > hint->search_start + || slice_start + (hint->end / 100) <= hint->search_start) { + hint->search_start = slice_start; + } } - -static void determine_search_start(reiserfs_blocknr_hint_t *hint, - int amount_needed) + +static void determine_search_start(reiserfs_blocknr_hint_t * hint, + int amount_needed) { - struct super_block *s = hint->th->t_super; - int unfm_hint; + struct super_block *s = hint->th->t_super; + int unfm_hint; - hint->beg = 0; - hint->end = SB_BLOCK_COUNT(s) - 1; + hint->beg = 0; + hint->end = SB_BLOCK_COUNT(s) - 1; - /* This is former border algorithm. Now with tunable border offset */ - if (concentrating_formatted_nodes(s)) - set_border_in_hint(s, hint); + /* This is former border algorithm. Now with tunable border offset */ + if (concentrating_formatted_nodes(s)) + set_border_in_hint(s, hint); #ifdef DISPLACE_NEW_PACKING_LOCALITIES - /* whenever we create a new directory, we displace it. At first we will - hash for location, later we might look for a moderately empty place for - it */ - if (displacing_new_packing_localities(s) - && hint->th->displace_new_blocks) { - displace_new_packing_locality(hint); - - /* we do not continue determine_search_start, - * if new packing locality is being displaced */ - return; - } + /* whenever we create a new directory, we displace it. At first we will + hash for location, later we might look for a moderately empty place for + it */ + if (displacing_new_packing_localities(s) + && hint->th->displace_new_blocks) { + displace_new_packing_locality(hint); + + /* we do not continue determine_search_start, + * if new packing locality is being displaced */ + return; + } #endif - - /* all persons should feel encouraged to add more special cases here and - * test them */ - if (displacing_large_files(s) && !hint->formatted_node - && this_blocknr_allocation_would_make_it_a_large_file(hint)) { - displace_large_file(hint); - return; - } - - /* if none of our special cases is relevant, use the left neighbor in the - tree order of the new node we are allocating for */ - if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) { - hash_formatted_node(hint); - return; - } + /* all persons should feel encouraged to add more special cases here and + * test them */ - unfm_hint = get_left_neighbor(hint); + if (displacing_large_files(s) && !hint->formatted_node + && this_blocknr_allocation_would_make_it_a_large_file(hint)) { + displace_large_file(hint); + return; + } - /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, - new blocks are displaced based on directory ID. Also, if suggested search_start - is less than last preallocated block, we start searching from it, assuming that - HDD dataflow is faster in forward direction */ - if ( TEST_OPTION(old_way, s)) { - if (!hint->formatted_node) { - if ( !reiserfs_hashed_relocation(s)) - old_way(hint); - else if (!reiserfs_no_unhashed_relocation(s)) - old_hashed_relocation(hint); + /* if none of our special cases is relevant, use the left neighbor in the + tree order of the new node we are allocating for */ + if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { + hash_formatted_node(hint); + return; + } - if ( hint->inode && hint->search_start < REISERFS_I(hint->inode)->i_prealloc_block) - hint->search_start = REISERFS_I(hint->inode)->i_prealloc_block; + unfm_hint = get_left_neighbor(hint); + + /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, + new blocks are displaced based on directory ID. Also, if suggested search_start + is less than last preallocated block, we start searching from it, assuming that + HDD dataflow is faster in forward direction */ + if (TEST_OPTION(old_way, s)) { + if (!hint->formatted_node) { + if (!reiserfs_hashed_relocation(s)) + old_way(hint); + else if (!reiserfs_no_unhashed_relocation(s)) + old_hashed_relocation(hint); + + if (hint->inode + && hint->search_start < + REISERFS_I(hint->inode)->i_prealloc_block) + hint->search_start = + REISERFS_I(hint->inode)->i_prealloc_block; + } + return; } - return; - } - /* This is an approach proposed by Hans */ - if ( TEST_OPTION(hundredth_slices, s) && ! (displacing_large_files(s) && !hint->formatted_node)) { - hundredth_slices(hint); - return; - } - - /* old_hashed_relocation only works on unformatted */ - if (!unfm_hint && !hint->formatted_node && - TEST_OPTION(old_hashed_relocation, s)) - { - old_hashed_relocation(hint); - } - /* new_hashed_relocation works with both formatted/unformatted nodes */ - if ((!unfm_hint || hint->formatted_node) && - TEST_OPTION(new_hashed_relocation, s)) - { - new_hashed_relocation(hint); - } - /* dirid grouping works only on unformatted nodes */ - if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups,s)) - { - dirid_groups(hint); - } + /* This is an approach proposed by Hans */ + if (TEST_OPTION(hundredth_slices, s) + && !(displacing_large_files(s) && !hint->formatted_node)) { + hundredth_slices(hint); + return; + } + /* old_hashed_relocation only works on unformatted */ + if (!unfm_hint && !hint->formatted_node && + TEST_OPTION(old_hashed_relocation, s)) { + old_hashed_relocation(hint); + } + /* new_hashed_relocation works with both formatted/unformatted nodes */ + if ((!unfm_hint || hint->formatted_node) && + TEST_OPTION(new_hashed_relocation, s)) { + new_hashed_relocation(hint); + } + /* dirid grouping works only on unformatted nodes */ + if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { + dirid_groups(hint); + } #ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (hint->formatted_node && TEST_OPTION(dirid_groups,s)) - { - dirid_groups(hint); - } + if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) { + dirid_groups(hint); + } #endif - /* oid grouping works only on unformatted nodes */ - if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups,s)) - { - oid_groups(hint); - } - return; + /* oid grouping works only on unformatted nodes */ + if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) { + oid_groups(hint); + } + return; } static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) { - /* make minimum size a mount option and benchmark both ways */ - /* we preallocate blocks only for regular files, specific size */ - /* benchmark preallocating always and see what happens */ - - hint->prealloc_size = 0; - - if (!hint->formatted_node && hint->preallocate) { - if (S_ISREG(hint->inode->i_mode) - && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocmin * hint->inode->i_sb->s_blocksize) - hint->prealloc_size = REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocsize - 1; - } - return CARRY_ON; + /* make minimum size a mount option and benchmark both ways */ + /* we preallocate blocks only for regular files, specific size */ + /* benchmark preallocating always and see what happens */ + + hint->prealloc_size = 0; + + if (!hint->formatted_node && hint->preallocate) { + if (S_ISREG(hint->inode->i_mode) + && hint->inode->i_size >= + REISERFS_SB(hint->th->t_super)->s_alloc_options. + preallocmin * hint->inode->i_sb->s_blocksize) + hint->prealloc_size = + REISERFS_SB(hint->th->t_super)->s_alloc_options. + preallocsize - 1; + } + return CARRY_ON; } /* XXX I know it could be merged with upper-level function; but may be result function would be too complex. */ -static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint, - b_blocknr_t * new_blocknrs, - b_blocknr_t start, b_blocknr_t finish, - int min, - int amount_needed, int prealloc_size) +static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, + b_blocknr_t * new_blocknrs, + b_blocknr_t start, + b_blocknr_t finish, int min, + int amount_needed, + int prealloc_size) { - int rest = amount_needed; - int nr_allocated; - - while (rest > 0 && start <= finish) { - nr_allocated = scan_bitmap (hint->th, &start, finish, min, - rest + prealloc_size, !hint->formatted_node, - hint->block); - - if (nr_allocated == 0) /* no new blocks allocated, return */ - break; - - /* fill free_blocknrs array first */ - while (rest > 0 && nr_allocated > 0) { - * new_blocknrs ++ = start ++; - rest --; nr_allocated --; - } + int rest = amount_needed; + int nr_allocated; + + while (rest > 0 && start <= finish) { + nr_allocated = scan_bitmap(hint->th, &start, finish, min, + rest + prealloc_size, + !hint->formatted_node, hint->block); + + if (nr_allocated == 0) /* no new blocks allocated, return */ + break; + + /* fill free_blocknrs array first */ + while (rest > 0 && nr_allocated > 0) { + *new_blocknrs++ = start++; + rest--; + nr_allocated--; + } - /* do we have something to fill prealloc. array also ? */ - if (nr_allocated > 0) { - /* it means prealloc_size was greater that 0 and we do preallocation */ - list_add(&REISERFS_I(hint->inode)->i_prealloc_list, - &SB_JOURNAL(hint->th->t_super)->j_prealloc_list); - REISERFS_I(hint->inode)->i_prealloc_block = start; - REISERFS_I(hint->inode)->i_prealloc_count = nr_allocated; - break; + /* do we have something to fill prealloc. array also ? */ + if (nr_allocated > 0) { + /* it means prealloc_size was greater that 0 and we do preallocation */ + list_add(&REISERFS_I(hint->inode)->i_prealloc_list, + &SB_JOURNAL(hint->th->t_super)-> + j_prealloc_list); + REISERFS_I(hint->inode)->i_prealloc_block = start; + REISERFS_I(hint->inode)->i_prealloc_count = + nr_allocated; + break; + } } - } - return (amount_needed - rest); + return (amount_needed - rest); } static inline int blocknrs_and_prealloc_arrays_from_search_start - (reiserfs_blocknr_hint_t *hint, b_blocknr_t *new_blocknrs, int amount_needed) -{ - struct super_block *s = hint->th->t_super; - b_blocknr_t start = hint->search_start; - b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; - int passno = 0; - int nr_allocated = 0; - int bigalloc = 0; - - determine_prealloc_size(hint); - if (!hint->formatted_node) { - int quota_ret; + (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, + int amount_needed) { + struct super_block *s = hint->th->t_super; + b_blocknr_t start = hint->search_start; + b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; + int passno = 0; + int nr_allocated = 0; + int bigalloc = 0; + + determine_prealloc_size(hint); + if (!hint->formatted_node) { + int quota_ret; #ifdef REISERQUOTA_DEBUG - reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: allocating %d blocks id=%u", amount_needed, hint->inode->i_uid); + reiserfs_debug(s, REISERFS_DEBUG_CODE, + "reiserquota: allocating %d blocks id=%u", + amount_needed, hint->inode->i_uid); #endif - quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); - if (quota_ret) /* Quota exceeded? */ - return QUOTA_EXCEEDED; - if (hint->preallocate && hint->prealloc_size ) { + quota_ret = + DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); + if (quota_ret) /* Quota exceeded? */ + return QUOTA_EXCEEDED; + if (hint->preallocate && hint->prealloc_size) { #ifdef REISERQUOTA_DEBUG - reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: allocating (prealloc) %d blocks id=%u", hint->prealloc_size, hint->inode->i_uid); + reiserfs_debug(s, REISERFS_DEBUG_CODE, + "reiserquota: allocating (prealloc) %d blocks id=%u", + hint->prealloc_size, hint->inode->i_uid); #endif - quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, hint->prealloc_size); - if (quota_ret) - hint->preallocate=hint->prealloc_size=0; + quota_ret = + DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, + hint->prealloc_size); + if (quota_ret) + hint->preallocate = hint->prealloc_size = 0; + } + /* for unformatted nodes, force large allocations */ + bigalloc = amount_needed; } - /* for unformatted nodes, force large allocations */ - bigalloc = amount_needed; - } - do { - /* in bigalloc mode, nr_allocated should stay zero until - * the entire allocation is filled - */ - if (unlikely(bigalloc && nr_allocated)) { - reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n", - bigalloc, nr_allocated); - /* reset things to a sane value */ - bigalloc = amount_needed - nr_allocated; - } - /* - * try pass 0 and pass 1 looking for a nice big - * contiguous allocation. Then reset and look - * for anything you can find. - */ - if (passno == 2 && bigalloc) { - passno = 0; - bigalloc = 0; - } - switch (passno++) { - case 0: /* Search from hint->search_start to end of disk */ - start = hint->search_start; - finish = SB_BLOCK_COUNT(s) - 1; - break; - case 1: /* Search from hint->beg to hint->search_start */ - start = hint->beg; - finish = hint->search_start; - break; - case 2: /* Last chance: Search from 0 to hint->beg */ - start = 0; - finish = hint->beg; - break; - default: /* We've tried searching everywhere, not enough space */ - /* Free the blocks */ - if (!hint->formatted_node) { + do { + /* in bigalloc mode, nr_allocated should stay zero until + * the entire allocation is filled + */ + if (unlikely(bigalloc && nr_allocated)) { + reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n", + bigalloc, nr_allocated); + /* reset things to a sane value */ + bigalloc = amount_needed - nr_allocated; + } + /* + * try pass 0 and pass 1 looking for a nice big + * contiguous allocation. Then reset and look + * for anything you can find. + */ + if (passno == 2 && bigalloc) { + passno = 0; + bigalloc = 0; + } + switch (passno++) { + case 0: /* Search from hint->search_start to end of disk */ + start = hint->search_start; + finish = SB_BLOCK_COUNT(s) - 1; + break; + case 1: /* Search from hint->beg to hint->search_start */ + start = hint->beg; + finish = hint->search_start; + break; + case 2: /* Last chance: Search from 0 to hint->beg */ + start = 0; + finish = hint->beg; + break; + default: /* We've tried searching everywhere, not enough space */ + /* Free the blocks */ + if (!hint->formatted_node) { #ifdef REISERQUOTA_DEBUG - reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: freeing (nospace) %d blocks id=%u", amount_needed + hint->prealloc_size - nr_allocated, hint->inode->i_uid); + reiserfs_debug(s, REISERFS_DEBUG_CODE, + "reiserquota: freeing (nospace) %d blocks id=%u", + amount_needed + + hint->prealloc_size - + nr_allocated, + hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ - } - while (nr_allocated --) - reiserfs_free_block(hint->th, hint->inode, new_blocknrs[nr_allocated], !hint->formatted_node); - - return NO_DISK_SPACE; - } - } while ((nr_allocated += allocate_without_wrapping_disk (hint, - new_blocknrs + nr_allocated, start, finish, - bigalloc ? bigalloc : 1, - amount_needed - nr_allocated, - hint->prealloc_size)) - < amount_needed); - if ( !hint->formatted_node && - amount_needed + hint->prealloc_size > - nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) { - /* Some of preallocation blocks were not allocated */ + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ + } + while (nr_allocated--) + reiserfs_free_block(hint->th, hint->inode, + new_blocknrs[nr_allocated], + !hint->formatted_node); + + return NO_DISK_SPACE; + } + } while ((nr_allocated += allocate_without_wrapping_disk(hint, + new_blocknrs + + nr_allocated, + start, finish, + bigalloc ? + bigalloc : 1, + amount_needed - + nr_allocated, + hint-> + prealloc_size)) + < amount_needed); + if (!hint->formatted_node && + amount_needed + hint->prealloc_size > + nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) { + /* Some of preallocation blocks were not allocated */ #ifdef REISERQUOTA_DEBUG - reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: freeing (failed prealloc) %d blocks id=%u", amount_needed + hint->prealloc_size - nr_allocated - REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); + reiserfs_debug(s, REISERFS_DEBUG_CODE, + "reiserquota: freeing (failed prealloc) %d blocks id=%u", + amount_needed + hint->prealloc_size - + nr_allocated - + REISERFS_I(hint->inode)->i_prealloc_count, + hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + - hint->prealloc_size - nr_allocated - - REISERFS_I(hint->inode)->i_prealloc_count); - } + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + + hint->prealloc_size - nr_allocated - + REISERFS_I(hint->inode)-> + i_prealloc_count); + } - return CARRY_ON; + return CARRY_ON; } /* grab new blocknrs from preallocated list */ /* return amount still needed after using them */ -static int use_preallocated_list_if_available (reiserfs_blocknr_hint_t *hint, - b_blocknr_t *new_blocknrs, int amount_needed) +static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint, + b_blocknr_t * new_blocknrs, + int amount_needed) { - struct inode * inode = hint->inode; + struct inode *inode = hint->inode; - if (REISERFS_I(inode)->i_prealloc_count > 0) { - while (amount_needed) { + if (REISERFS_I(inode)->i_prealloc_count > 0) { + while (amount_needed) { - *new_blocknrs ++ = REISERFS_I(inode)->i_prealloc_block ++; - REISERFS_I(inode)->i_prealloc_count --; + *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++; + REISERFS_I(inode)->i_prealloc_count--; - amount_needed --; + amount_needed--; - if (REISERFS_I(inode)->i_prealloc_count <= 0) { - list_del(&REISERFS_I(inode)->i_prealloc_list); - break; - } + if (REISERFS_I(inode)->i_prealloc_count <= 0) { + list_del(&REISERFS_I(inode)->i_prealloc_list); + break; + } + } } - } - /* return amount still needed after using preallocated blocks */ - return amount_needed; + /* return amount still needed after using preallocated blocks */ + return amount_needed; } -int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint, - b_blocknr_t * new_blocknrs, int amount_needed, - int reserved_by_us /* Amount of blocks we have - already reserved */) +int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have + already reserved */ ) { - int initial_amount_needed = amount_needed; - int ret; - struct super_block *s = hint->th->t_super; - - /* Check if there is enough space, taking into account reserved space */ - if ( SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks < - amount_needed - reserved_by_us) - return NO_DISK_SPACE; - /* should this be if !hint->inode && hint->preallocate? */ - /* do you mean hint->formatted_node can be removed ? - Zam */ - /* hint->formatted_node cannot be removed because we try to access - inode information here, and there is often no inode assotiated with - metadata allocations - green */ - - if (!hint->formatted_node && hint->preallocate) { - amount_needed = use_preallocated_list_if_available + int initial_amount_needed = amount_needed; + int ret; + struct super_block *s = hint->th->t_super; + + /* Check if there is enough space, taking into account reserved space */ + if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks < + amount_needed - reserved_by_us) + return NO_DISK_SPACE; + /* should this be if !hint->inode && hint->preallocate? */ + /* do you mean hint->formatted_node can be removed ? - Zam */ + /* hint->formatted_node cannot be removed because we try to access + inode information here, and there is often no inode assotiated with + metadata allocations - green */ + + if (!hint->formatted_node && hint->preallocate) { + amount_needed = use_preallocated_list_if_available + (hint, new_blocknrs, amount_needed); + if (amount_needed == 0) /* all blocknrs we need we got from + prealloc. list */ + return CARRY_ON; + new_blocknrs += (initial_amount_needed - amount_needed); + } + + /* find search start and save it in hint structure */ + determine_search_start(hint, amount_needed); + if (hint->search_start >= SB_BLOCK_COUNT(s)) + hint->search_start = SB_BLOCK_COUNT(s) - 1; + + /* allocation itself; fill new_blocknrs and preallocation arrays */ + ret = blocknrs_and_prealloc_arrays_from_search_start (hint, new_blocknrs, amount_needed); - if (amount_needed == 0) /* all blocknrs we need we got from - prealloc. list */ - return CARRY_ON; - new_blocknrs += (initial_amount_needed - amount_needed); - } - - /* find search start and save it in hint structure */ - determine_search_start(hint, amount_needed); - if (hint->search_start >= SB_BLOCK_COUNT(s)) - hint->search_start = SB_BLOCK_COUNT(s) - 1; - - /* allocation itself; fill new_blocknrs and preallocation arrays */ - ret = blocknrs_and_prealloc_arrays_from_search_start - (hint, new_blocknrs, amount_needed); - - /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we - * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second - * variant) */ - - if (ret != CARRY_ON) { - while (amount_needed ++ < initial_amount_needed) { - reiserfs_free_block(hint->th, hint->inode, *(--new_blocknrs), 1); + + /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we + * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second + * variant) */ + + if (ret != CARRY_ON) { + while (amount_needed++ < initial_amount_needed) { + reiserfs_free_block(hint->th, hint->inode, + *(--new_blocknrs), 1); + } } - } - return ret; + return ret; } /* These 2 functions are here to provide blocks reservation to the rest of kernel */ /* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure there are actually this much blocks on the FS available */ -void reiserfs_claim_blocks_to_be_allocated( - struct super_block *sb, /* super block of - filesystem where - blocks should be - reserved */ - int blocks /* How much to reserve */ - ) +void reiserfs_claim_blocks_to_be_allocated(struct super_block *sb, /* super block of + filesystem where + blocks should be + reserved */ + int blocks /* How much to reserve */ + ) { - /* Fast case, if reservation is zero - exit immediately. */ - if ( !blocks ) - return; + /* Fast case, if reservation is zero - exit immediately. */ + if (!blocks) + return; - spin_lock(&REISERFS_SB(sb)->bitmap_lock); - REISERFS_SB(sb)->reserved_blocks += blocks; - spin_unlock(&REISERFS_SB(sb)->bitmap_lock); + spin_lock(&REISERFS_SB(sb)->bitmap_lock); + REISERFS_SB(sb)->reserved_blocks += blocks; + spin_unlock(&REISERFS_SB(sb)->bitmap_lock); } /* Unreserve @blocks amount of blocks in fs pointed by @sb */ -void reiserfs_release_claimed_blocks( - struct super_block *sb, /* super block of - filesystem where - blocks should be - reserved */ - int blocks /* How much to unreserve */ - ) +void reiserfs_release_claimed_blocks(struct super_block *sb, /* super block of + filesystem where + blocks should be + reserved */ + int blocks /* How much to unreserve */ + ) { - /* Fast case, if unreservation is zero - exit immediately. */ - if ( !blocks ) - return; + /* Fast case, if unreservation is zero - exit immediately. */ + if (!blocks) + return; - spin_lock(&REISERFS_SB(sb)->bitmap_lock); - REISERFS_SB(sb)->reserved_blocks -= blocks; - spin_unlock(&REISERFS_SB(sb)->bitmap_lock); - RFALSE( REISERFS_SB(sb)->reserved_blocks < 0, "amount of blocks reserved became zero?"); + spin_lock(&REISERFS_SB(sb)->bitmap_lock); + REISERFS_SB(sb)->reserved_blocks -= blocks; + spin_unlock(&REISERFS_SB(sb)->bitmap_lock); + RFALSE(REISERFS_SB(sb)->reserved_blocks < 0, + "amount of blocks reserved became zero?"); } /* This function estimates how much pages we will be able to write to FS used for reiserfs_file_write() purposes for now. */ -int reiserfs_can_fit_pages ( struct super_block *sb /* superblock of filesystem - to estimate space */ ) +int reiserfs_can_fit_pages(struct super_block *sb /* superblock of filesystem + to estimate space */ ) { int space; spin_lock(&REISERFS_SB(sb)->bitmap_lock); - space = (SB_FREE_BLOCKS(sb) - REISERFS_SB(sb)->reserved_blocks) >> ( PAGE_CACHE_SHIFT - sb->s_blocksize_bits); + space = + (SB_FREE_BLOCKS(sb) - + REISERFS_SB(sb)->reserved_blocks) >> (PAGE_CACHE_SHIFT - + sb->s_blocksize_bits); spin_unlock(&REISERFS_SB(sb)->bitmap_lock); - return space>0?space:0; + return space > 0 ? space : 0; } diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index fbde4b01a32..9dd71e80703 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -12,264 +12,286 @@ #include #include -extern struct reiserfs_key MIN_KEY; +extern struct reiserfs_key MIN_KEY; -static int reiserfs_readdir (struct file *, void *, filldir_t); -static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) ; +static int reiserfs_readdir(struct file *, void *, filldir_t); +static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, + int datasync); struct file_operations reiserfs_dir_operations = { - .read = generic_read_dir, - .readdir = reiserfs_readdir, - .fsync = reiserfs_dir_fsync, - .ioctl = reiserfs_ioctl, + .read = generic_read_dir, + .readdir = reiserfs_readdir, + .fsync = reiserfs_dir_fsync, + .ioctl = reiserfs_ioctl, }; -static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) { - struct inode *inode = dentry->d_inode; - int err; - reiserfs_write_lock(inode->i_sb); - err = reiserfs_commit_for_inode(inode) ; - reiserfs_write_unlock(inode->i_sb) ; - if (err < 0) - return err; - return 0; +static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, + int datasync) +{ + struct inode *inode = dentry->d_inode; + int err; + reiserfs_write_lock(inode->i_sb); + err = reiserfs_commit_for_inode(inode); + reiserfs_write_unlock(inode->i_sb); + if (err < 0) + return err; + return 0; } - #define store_ih(where,what) copy_item_head (where, what) // -static int reiserfs_readdir (struct file * filp, void * dirent, filldir_t filldir) +static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_dentry->d_inode; - struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ - INITIALIZE_PATH (path_to_entry); - struct buffer_head * bh; - int item_num, entry_num; - const struct reiserfs_key * rkey; - struct item_head * ih, tmp_ih; - int search_res; - char * local_buf; - loff_t next_pos; - char small_buf[32] ; /* avoid kmalloc if we can */ - struct reiserfs_dir_entry de; - int ret = 0; - - reiserfs_write_lock(inode->i_sb); - - reiserfs_check_lock_depth(inode->i_sb, "readdir") ; - - /* form key for search the next directory entry using f_pos field of - file structure */ - make_cpu_key (&pos_key, inode, (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, - TYPE_DIRENTRY, 3); - next_pos = cpu_key_k_offset (&pos_key); - - /* reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos);*/ - - path_to_entry.reada = PATH_READA; - while (1) { - research: - /* search the directory item, containing entry with specified key */ - search_res = search_by_entry_key (inode->i_sb, &pos_key, &path_to_entry, &de); - if (search_res == IO_ERROR) { - // FIXME: we could just skip part of directory which could - // not be read - ret = -EIO; - goto out; - } - entry_num = de.de_entry_num; - bh = de.de_bh; - item_num = de.de_item_num; - ih = de.de_ih; - store_ih (&tmp_ih, ih); - - /* we must have found item, that is item of this directory, */ - RFALSE( COMP_SHORT_KEYS (&(ih->ih_key), &pos_key), - "vs-9000: found item %h does not match to dir we readdir %K", - ih, &pos_key); - RFALSE( item_num > B_NR_ITEMS (bh) - 1, - "vs-9005 item_num == %d, item amount == %d", - item_num, B_NR_ITEMS (bh)); - - /* and entry must be not more than number of entries in the item */ - RFALSE( I_ENTRY_COUNT (ih) < entry_num, - "vs-9010: entry number is too big %d (%d)", - entry_num, I_ENTRY_COUNT (ih)); - - if (search_res == POSITION_FOUND || entry_num < I_ENTRY_COUNT (ih)) { - /* go through all entries in the directory item beginning from the entry, that has been found */ - struct reiserfs_de_head * deh = B_I_DEH (bh, ih) + entry_num; - - for (; entry_num < I_ENTRY_COUNT (ih); entry_num ++, deh ++) { - int d_reclen; - char * d_name; - off_t d_off; - ino_t d_ino; - - if (!de_visible (deh)) - /* it is hidden entry */ - continue; - d_reclen = entry_length (bh, ih, entry_num); - d_name = B_I_DEH_ENTRY_FILE_NAME (bh, ih, deh); - if (!d_name[d_reclen - 1]) - d_reclen = strlen (d_name); - - if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)){ - /* too big to send back to VFS */ - continue ; - } - - /* Ignore the .reiserfs_priv entry */ - if (reiserfs_xattrs (inode->i_sb) && - !old_format_only(inode->i_sb) && - filp->f_dentry == inode->i_sb->s_root && - REISERFS_SB(inode->i_sb)->priv_root && - REISERFS_SB(inode->i_sb)->priv_root->d_inode && - deh_objectid(deh) == le32_to_cpu (INODE_PKEY(REISERFS_SB(inode->i_sb)->priv_root->d_inode)->k_objectid)) { - continue; - } - - d_off = deh_offset (deh); - filp->f_pos = d_off ; - d_ino = deh_objectid (deh); - if (d_reclen <= 32) { - local_buf = small_buf ; - } else { - local_buf = reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb) ; - if (!local_buf) { - pathrelse (&path_to_entry); - ret = -ENOMEM ; + struct inode *inode = filp->f_dentry->d_inode; + struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ + INITIALIZE_PATH(path_to_entry); + struct buffer_head *bh; + int item_num, entry_num; + const struct reiserfs_key *rkey; + struct item_head *ih, tmp_ih; + int search_res; + char *local_buf; + loff_t next_pos; + char small_buf[32]; /* avoid kmalloc if we can */ + struct reiserfs_dir_entry de; + int ret = 0; + + reiserfs_write_lock(inode->i_sb); + + reiserfs_check_lock_depth(inode->i_sb, "readdir"); + + /* form key for search the next directory entry using f_pos field of + file structure */ + make_cpu_key(&pos_key, inode, + (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, TYPE_DIRENTRY, + 3); + next_pos = cpu_key_k_offset(&pos_key); + + /* reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos); */ + + path_to_entry.reada = PATH_READA; + while (1) { + research: + /* search the directory item, containing entry with specified key */ + search_res = + search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, + &de); + if (search_res == IO_ERROR) { + // FIXME: we could just skip part of directory which could + // not be read + ret = -EIO; goto out; - } - if (item_moved (&tmp_ih, &path_to_entry)) { - reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; - goto research; - } - } - // Note, that we copy name to user space via temporary - // buffer (local_buf) because filldir will block if - // user space buffer is swapped out. At that time - // entry can move to somewhere else - memcpy (local_buf, d_name, d_reclen); - if (filldir (dirent, local_buf, d_reclen, d_off, d_ino, - DT_UNKNOWN) < 0) { - if (local_buf != small_buf) { - reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; - } - goto end; } - if (local_buf != small_buf) { - reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; + entry_num = de.de_entry_num; + bh = de.de_bh; + item_num = de.de_item_num; + ih = de.de_ih; + store_ih(&tmp_ih, ih); + + /* we must have found item, that is item of this directory, */ + RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key), + "vs-9000: found item %h does not match to dir we readdir %K", + ih, &pos_key); + RFALSE(item_num > B_NR_ITEMS(bh) - 1, + "vs-9005 item_num == %d, item amount == %d", + item_num, B_NR_ITEMS(bh)); + + /* and entry must be not more than number of entries in the item */ + RFALSE(I_ENTRY_COUNT(ih) < entry_num, + "vs-9010: entry number is too big %d (%d)", + entry_num, I_ENTRY_COUNT(ih)); + + if (search_res == POSITION_FOUND + || entry_num < I_ENTRY_COUNT(ih)) { + /* go through all entries in the directory item beginning from the entry, that has been found */ + struct reiserfs_de_head *deh = + B_I_DEH(bh, ih) + entry_num; + + for (; entry_num < I_ENTRY_COUNT(ih); + entry_num++, deh++) { + int d_reclen; + char *d_name; + off_t d_off; + ino_t d_ino; + + if (!de_visible(deh)) + /* it is hidden entry */ + continue; + d_reclen = entry_length(bh, ih, entry_num); + d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); + if (!d_name[d_reclen - 1]) + d_reclen = strlen(d_name); + + if (d_reclen > + REISERFS_MAX_NAME(inode->i_sb-> + s_blocksize)) { + /* too big to send back to VFS */ + continue; + } + + /* Ignore the .reiserfs_priv entry */ + if (reiserfs_xattrs(inode->i_sb) && + !old_format_only(inode->i_sb) && + filp->f_dentry == inode->i_sb->s_root && + REISERFS_SB(inode->i_sb)->priv_root && + REISERFS_SB(inode->i_sb)->priv_root->d_inode + && deh_objectid(deh) == + le32_to_cpu(INODE_PKEY + (REISERFS_SB(inode->i_sb)-> + priv_root->d_inode)-> + k_objectid)) { + continue; + } + + d_off = deh_offset(deh); + filp->f_pos = d_off; + d_ino = deh_objectid(deh); + if (d_reclen <= 32) { + local_buf = small_buf; + } else { + local_buf = + reiserfs_kmalloc(d_reclen, GFP_NOFS, + inode->i_sb); + if (!local_buf) { + pathrelse(&path_to_entry); + ret = -ENOMEM; + goto out; + } + if (item_moved(&tmp_ih, &path_to_entry)) { + reiserfs_kfree(local_buf, + d_reclen, + inode->i_sb); + goto research; + } + } + // Note, that we copy name to user space via temporary + // buffer (local_buf) because filldir will block if + // user space buffer is swapped out. At that time + // entry can move to somewhere else + memcpy(local_buf, d_name, d_reclen); + if (filldir + (dirent, local_buf, d_reclen, d_off, d_ino, + DT_UNKNOWN) < 0) { + if (local_buf != small_buf) { + reiserfs_kfree(local_buf, + d_reclen, + inode->i_sb); + } + goto end; + } + if (local_buf != small_buf) { + reiserfs_kfree(local_buf, d_reclen, + inode->i_sb); + } + // next entry should be looked for with such offset + next_pos = deh_offset(deh) + 1; + + if (item_moved(&tmp_ih, &path_to_entry)) { + goto research; + } + } /* for */ } - // next entry should be looked for with such offset - next_pos = deh_offset (deh) + 1; + if (item_num != B_NR_ITEMS(bh) - 1) + // end of directory has been reached + goto end; + + /* item we went through is last item of node. Using right + delimiting key check is it directory end */ + rkey = get_rkey(&path_to_entry, inode->i_sb); + if (!comp_le_keys(rkey, &MIN_KEY)) { + /* set pos_key to key, that is the smallest and greater + that key of the last entry in the item */ + set_cpu_key_k_offset(&pos_key, next_pos); + continue; + } - if (item_moved (&tmp_ih, &path_to_entry)) { - goto research; + if (COMP_SHORT_KEYS(rkey, &pos_key)) { + // end of directory has been reached + goto end; } - } /* for */ - } - - if (item_num != B_NR_ITEMS (bh) - 1) - // end of directory has been reached - goto end; - - /* item we went through is last item of node. Using right - delimiting key check is it directory end */ - rkey = get_rkey (&path_to_entry, inode->i_sb); - if (! comp_le_keys (rkey, &MIN_KEY)) { - /* set pos_key to key, that is the smallest and greater - that key of the last entry in the item */ - set_cpu_key_k_offset (&pos_key, next_pos); - continue; - } - - if ( COMP_SHORT_KEYS (rkey, &pos_key)) { - // end of directory has been reached - goto end; - } - - /* directory continues in the right neighboring block */ - set_cpu_key_k_offset (&pos_key, le_key_k_offset (KEY_FORMAT_3_5, rkey)); - - } /* while */ - - - end: - filp->f_pos = next_pos; - pathrelse (&path_to_entry); - reiserfs_check_path(&path_to_entry) ; - out: - reiserfs_write_unlock(inode->i_sb); - return ret; + + /* directory continues in the right neighboring block */ + set_cpu_key_k_offset(&pos_key, + le_key_k_offset(KEY_FORMAT_3_5, rkey)); + + } /* while */ + + end: + filp->f_pos = next_pos; + pathrelse(&path_to_entry); + reiserfs_check_path(&path_to_entry); + out: + reiserfs_write_unlock(inode->i_sb); + return ret; } /* compose directory item containing "." and ".." entries (entries are not aligned to 4 byte boundary) */ /* the last four params are LE */ -void make_empty_dir_item_v1 (char * body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid) +void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, + __le32 par_dirid, __le32 par_objid) { - struct reiserfs_de_head * deh; - - memset (body, 0, EMPTY_DIR_SIZE_V1); - deh = (struct reiserfs_de_head *)body; - - /* direntry header of "." */ - put_deh_offset( &(deh[0]), DOT_OFFSET ); - /* these two are from make_le_item_head, and are are LE */ - deh[0].deh_dir_id = dirid; - deh[0].deh_objectid = objid; - deh[0].deh_state = 0; /* Endian safe if 0 */ - put_deh_location( &(deh[0]), EMPTY_DIR_SIZE_V1 - strlen( "." )); - mark_de_visible(&(deh[0])); - - /* direntry header of ".." */ - put_deh_offset( &(deh[1]), DOT_DOT_OFFSET); - /* key of ".." for the root directory */ - /* these two are from the inode, and are are LE */ - deh[1].deh_dir_id = par_dirid; - deh[1].deh_objectid = par_objid; - deh[1].deh_state = 0; /* Endian safe if 0 */ - put_deh_location( &(deh[1]), deh_location( &(deh[0]) ) - strlen( ".." ) ); - mark_de_visible(&(deh[1])); - - /* copy ".." and "." */ - memcpy (body + deh_location( &(deh[0]) ), ".", 1); - memcpy (body + deh_location( &(deh[1]) ), "..", 2); + struct reiserfs_de_head *deh; + + memset(body, 0, EMPTY_DIR_SIZE_V1); + deh = (struct reiserfs_de_head *)body; + + /* direntry header of "." */ + put_deh_offset(&(deh[0]), DOT_OFFSET); + /* these two are from make_le_item_head, and are are LE */ + deh[0].deh_dir_id = dirid; + deh[0].deh_objectid = objid; + deh[0].deh_state = 0; /* Endian safe if 0 */ + put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen(".")); + mark_de_visible(&(deh[0])); + + /* direntry header of ".." */ + put_deh_offset(&(deh[1]), DOT_DOT_OFFSET); + /* key of ".." for the root directory */ + /* these two are from the inode, and are are LE */ + deh[1].deh_dir_id = par_dirid; + deh[1].deh_objectid = par_objid; + deh[1].deh_state = 0; /* Endian safe if 0 */ + put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen("..")); + mark_de_visible(&(deh[1])); + + /* copy ".." and "." */ + memcpy(body + deh_location(&(deh[0])), ".", 1); + memcpy(body + deh_location(&(deh[1])), "..", 2); } /* compose directory item containing "." and ".." entries */ -void make_empty_dir_item (char * body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid) +void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, + __le32 par_dirid, __le32 par_objid) { - struct reiserfs_de_head * deh; - - memset (body, 0, EMPTY_DIR_SIZE); - deh = (struct reiserfs_de_head *)body; - - /* direntry header of "." */ - put_deh_offset( &(deh[0]), DOT_OFFSET ); - /* these two are from make_le_item_head, and are are LE */ - deh[0].deh_dir_id = dirid; - deh[0].deh_objectid = objid; - deh[0].deh_state = 0; /* Endian safe if 0 */ - put_deh_location( &(deh[0]), EMPTY_DIR_SIZE - ROUND_UP( strlen( "." ) ) ); - mark_de_visible(&(deh[0])); - - /* direntry header of ".." */ - put_deh_offset( &(deh[1]), DOT_DOT_OFFSET ); - /* key of ".." for the root directory */ - /* these two are from the inode, and are are LE */ - deh[1].deh_dir_id = par_dirid; - deh[1].deh_objectid = par_objid; - deh[1].deh_state = 0; /* Endian safe if 0 */ - put_deh_location( &(deh[1]), deh_location( &(deh[0])) - ROUND_UP( strlen( ".." ) ) ); - mark_de_visible(&(deh[1])); - - /* copy ".." and "." */ - memcpy (body + deh_location( &(deh[0]) ), ".", 1); - memcpy (body + deh_location( &(deh[1]) ), "..", 2); + struct reiserfs_de_head *deh; + + memset(body, 0, EMPTY_DIR_SIZE); + deh = (struct reiserfs_de_head *)body; + + /* direntry header of "." */ + put_deh_offset(&(deh[0]), DOT_OFFSET); + /* these two are from make_le_item_head, and are are LE */ + deh[0].deh_dir_id = dirid; + deh[0].deh_objectid = objid; + deh[0].deh_state = 0; /* Endian safe if 0 */ + put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen("."))); + mark_de_visible(&(deh[0])); + + /* direntry header of ".." */ + put_deh_offset(&(deh[1]), DOT_DOT_OFFSET); + /* key of ".." for the root directory */ + /* these two are from the inode, and are are LE */ + deh[1].deh_dir_id = par_dirid; + deh[1].deh_objectid = par_objid; + deh[1].deh_state = 0; /* Endian safe if 0 */ + put_deh_location(&(deh[1]), + deh_location(&(deh[0])) - ROUND_UP(strlen(".."))); + mark_de_visible(&(deh[1])); + + /* copy ".." and "." */ + memcpy(body + deh_location(&(deh[0])), ".", 1); + memcpy(body + deh_location(&(deh[1])), "..", 2); } diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 2118db2896c..b2264ba3cc5 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c @@ -8,7 +8,6 @@ /* balance the tree according to the analysis made before, */ /* and using buffers obtained after all above. */ - /** ** balance_leaf_when_delete ** balance_leaf @@ -24,23 +23,22 @@ #ifdef CONFIG_REISERFS_CHECK -struct tree_balance * cur_tb = NULL; /* detects whether more than one - copy of tb exists as a means - of checking whether schedule - is interrupting do_balance */ +struct tree_balance *cur_tb = NULL; /* detects whether more than one + copy of tb exists as a means + of checking whether schedule + is interrupting do_balance */ #endif -inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, - struct buffer_head * bh, int flag) +inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, + struct buffer_head *bh, int flag) { - journal_mark_dirty(tb->transaction_handle, - tb->transaction_handle->t_super, bh) ; + journal_mark_dirty(tb->transaction_handle, + tb->transaction_handle->t_super, bh); } #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty - /* summary: if deleting something ( tb->insert_size[0] < 0 ) return(balance_leaf_when_delete()); (flag d handled here) @@ -64,8 +62,6 @@ be performed by do_balance. -Hans */ - - /* Balance leaf node in case of delete or cut: insert_size[0] < 0 * * lnum, rnum can have values >= -1 @@ -73,1384 +69,1933 @@ be performed by do_balance. * 0 means that nothing should be done with the neighbor * >0 means to shift entirely or partly the specified number of items to the neighbor */ -static int balance_leaf_when_delete (struct tree_balance * tb, int flag) +static int balance_leaf_when_delete(struct tree_balance *tb, int flag) { - struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); - int item_pos = PATH_LAST_POSITION (tb->tb_path); - int pos_in_item = tb->tb_path->pos_in_item; - struct buffer_info bi; - int n; - struct item_head * ih; + struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); + int item_pos = PATH_LAST_POSITION(tb->tb_path); + int pos_in_item = tb->tb_path->pos_in_item; + struct buffer_info bi; + int n; + struct item_head *ih; - RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, - "vs- 12000: level: wrong FR %z", tb->FR[0]); - RFALSE( tb->blknum[0] > 1, - "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); - RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0), - "PAP-12010: tree can not be empty"); + RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, + "vs- 12000: level: wrong FR %z", tb->FR[0]); + RFALSE(tb->blknum[0] > 1, + "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); + RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), + "PAP-12010: tree can not be empty"); - ih = B_N_PITEM_HEAD (tbS0, item_pos); + ih = B_N_PITEM_HEAD(tbS0, item_pos); - /* Delete or truncate the item */ + /* Delete or truncate the item */ - switch (flag) { - case M_DELETE: /* delete item in S[0] */ + switch (flag) { + case M_DELETE: /* delete item in S[0] */ + + RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], + "vs-12013: mode Delete, insert size %d, ih to be deleted %h", + -tb->insert_size[0], ih); + + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0); + bi.bi_position = PATH_H_POSITION(tb->tb_path, 1); + leaf_delete_items(&bi, 0, item_pos, 1, -1); + + if (!item_pos && tb->CFL[0]) { + if (B_NR_ITEMS(tbS0)) { + replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, + 0); + } else { + if (!PATH_H_POSITION(tb->tb_path, 1)) + replace_key(tb, tb->CFL[0], tb->lkey[0], + PATH_H_PPARENT(tb->tb_path, + 0), 0); + } + } - RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], - "vs-12013: mode Delete, insert size %d, ih to be deleted %h", - -tb->insert_size [0], ih); + RFALSE(!item_pos && !tb->CFL[0], + "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], + tb->L[0]); - bi.tb = tb; - bi.bi_bh = tbS0; - bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); - bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); - leaf_delete_items (&bi, 0, item_pos, 1, -1); - - if ( ! item_pos && tb->CFL[0] ) { - if ( B_NR_ITEMS(tbS0) ) { - replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0); - } - else { - if ( ! PATH_H_POSITION (tb->tb_path, 1) ) - replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0); - } - } - - RFALSE( ! item_pos && !tb->CFL[0], - "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]); - - break; - - case M_CUT: { /* cut item in S[0] */ - bi.tb = tb; - bi.bi_bh = tbS0; - bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); - bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); - if (is_direntry_le_ih (ih)) { - - /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ - /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ - tb->insert_size[0] = -1; - leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]); - - RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0], - "PAP-12030: can not change delimiting key. CFL[0]=%p", - tb->CFL[0]); - - if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) { - replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0); - } - } else { - leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]); - - RFALSE( ! ih_item_len(ih), - "PAP-12035: cut must leave non-zero dynamic length of item"); - } - break; - } - - default: - print_cur_tb ("12040"); - reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)", - (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag); - } - - /* the rule is that no shifting occurs unless by shifting a node can be freed */ - n = B_NR_ITEMS(tbS0); - if ( tb->lnum[0] ) /* L[0] takes part in balancing */ - { - if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */ - { - if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */ - { - if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) ) - { - /* all contents of all the 3 buffers will be in L[0] */ - if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) ) - replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1); - - leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, NULL); - leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, NULL); - - reiserfs_invalidate_buffer (tb, tbS0); - reiserfs_invalidate_buffer (tb, tb->R[0]); - - return 0; + break; + + case M_CUT:{ /* cut item in S[0] */ + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0); + bi.bi_position = PATH_H_POSITION(tb->tb_path, 1); + if (is_direntry_le_ih(ih)) { + + /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ + /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ + tb->insert_size[0] = -1; + leaf_cut_from_buffer(&bi, item_pos, pos_in_item, + -tb->insert_size[0]); + + RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], + "PAP-12030: can not change delimiting key. CFL[0]=%p", + tb->CFL[0]); + + if (!item_pos && !pos_in_item && tb->CFL[0]) { + replace_key(tb, tb->CFL[0], tb->lkey[0], + tbS0, 0); + } + } else { + leaf_cut_from_buffer(&bi, item_pos, pos_in_item, + -tb->insert_size[0]); + + RFALSE(!ih_item_len(ih), + "PAP-12035: cut must leave non-zero dynamic length of item"); + } + break; } - /* all contents of all the 3 buffers will be in R[0] */ - leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, NULL); - leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, NULL); - /* right_delimiting_key is correct in R[0] */ - replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); + default: + print_cur_tb("12040"); + reiserfs_panic(tb->tb_sb, + "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)", + (flag == + M_PASTE) ? "PASTE" : ((flag == + M_INSERT) ? "INSERT" : + "UNKNOWN"), flag); + } - reiserfs_invalidate_buffer (tb, tbS0); - reiserfs_invalidate_buffer (tb, tb->L[0]); + /* the rule is that no shifting occurs unless by shifting a node can be freed */ + n = B_NR_ITEMS(tbS0); + if (tb->lnum[0]) { /* L[0] takes part in balancing */ + if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ + if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ + if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { + /* all contents of all the 3 buffers will be in L[0] */ + if (PATH_H_POSITION(tb->tb_path, 1) == 0 + && 1 < B_NR_ITEMS(tb->FR[0])) + replace_key(tb, tb->CFL[0], + tb->lkey[0], + tb->FR[0], 1); + + leaf_move_items(LEAF_FROM_S_TO_L, tb, n, + -1, NULL); + leaf_move_items(LEAF_FROM_R_TO_L, tb, + B_NR_ITEMS(tb->R[0]), + -1, NULL); + + reiserfs_invalidate_buffer(tb, tbS0); + reiserfs_invalidate_buffer(tb, + tb->R[0]); + + return 0; + } + /* all contents of all the 3 buffers will be in R[0] */ + leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, + NULL); + leaf_move_items(LEAF_FROM_L_TO_R, tb, + B_NR_ITEMS(tb->L[0]), -1, NULL); + + /* right_delimiting_key is correct in R[0] */ + replace_key(tb, tb->CFR[0], tb->rkey[0], + tb->R[0], 0); - return -1; - } + reiserfs_invalidate_buffer(tb, tbS0); + reiserfs_invalidate_buffer(tb, tb->L[0]); - RFALSE( tb->rnum[0] != 0, - "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); - /* all contents of L[0] and S[0] will be in L[0] */ - leaf_shift_left(tb, n, -1); + return -1; + } - reiserfs_invalidate_buffer (tb, tbS0); + RFALSE(tb->rnum[0] != 0, + "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); + /* all contents of L[0] and S[0] will be in L[0] */ + leaf_shift_left(tb, n, -1); - return 0; + reiserfs_invalidate_buffer(tb, tbS0); + + return 0; + } + /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ + + RFALSE((tb->lnum[0] + tb->rnum[0] < n) || + (tb->lnum[0] + tb->rnum[0] > n + 1), + "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", + tb->rnum[0], tb->lnum[0], n); + RFALSE((tb->lnum[0] + tb->rnum[0] == n) && + (tb->lbytes != -1 || tb->rbytes != -1), + "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", + tb->rbytes, tb->lbytes); + RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && + (tb->lbytes < 1 || tb->rbytes != -1), + "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", + tb->rbytes, tb->lbytes); + + leaf_shift_left(tb, tb->lnum[0], tb->lbytes); + leaf_shift_right(tb, tb->rnum[0], tb->rbytes); + + reiserfs_invalidate_buffer(tb, tbS0); + + return 0; } - /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ - - RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) || - ( tb->lnum[0] + tb->rnum[0] > n+1 ), - "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", - tb->rnum[0], tb->lnum[0], n); - RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) && - (tb->lbytes != -1 || tb->rbytes != -1), - "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", - tb->rbytes, tb->lbytes); - RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) && - (tb->lbytes < 1 || tb->rbytes != -1), - "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", - tb->rbytes, tb->lbytes); - - leaf_shift_left (tb, tb->lnum[0], tb->lbytes); - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - - reiserfs_invalidate_buffer (tb, tbS0); - return 0; - } + if (tb->rnum[0] == -1) { + /* all contents of R[0] and S[0] will be in R[0] */ + leaf_shift_right(tb, n, -1); + reiserfs_invalidate_buffer(tb, tbS0); + return 0; + } - if ( tb->rnum[0] == -1 ) { - /* all contents of R[0] and S[0] will be in R[0] */ - leaf_shift_right(tb, n, -1); - reiserfs_invalidate_buffer (tb, tbS0); + RFALSE(tb->rnum[0], + "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); return 0; - } - - RFALSE( tb->rnum[0], - "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); - return 0; } - -static int balance_leaf (struct tree_balance * tb, - struct item_head * ih, /* item header of inserted item (this is on little endian) */ - const char * body, /* body of inserted item or bytes to paste */ - int flag, /* i - insert, d - delete, c - cut, p - paste - (see comment to do_balance) */ - struct item_head * insert_key, /* in our processing of one level we sometimes determine what - must be inserted into the next higher level. This insertion - consists of a key or two keys and their corresponding - pointers */ - struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */ +static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ + const char *body, /* body of inserted item or bytes to paste */ + int flag, /* i - insert, d - delete, c - cut, p - paste + (see comment to do_balance) */ + struct item_head *insert_key, /* in our processing of one level we sometimes determine what + must be inserted into the next higher level. This insertion + consists of a key or two keys and their corresponding + pointers */ + struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */ ) { - struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); - int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0] - of the affected item */ - struct buffer_info bi; - struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ - int snum[2]; /* number of items that will be placed - into S_new (includes partially shifted - items) */ - int sbytes[2]; /* if an item is partially shifted into S_new then - if it is a directory item - it is the number of entries from the item that are shifted into S_new - else - it is the number of bytes from the item that are shifted into S_new - */ - int n, i; - int ret_val; - int pos_in_item; - int zeros_num; - - PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] ); - - /* Make balance in case insert_size[0] < 0 */ - if ( tb->insert_size[0] < 0 ) - return balance_leaf_when_delete (tb, flag); - - zeros_num = 0; - if (flag == M_INSERT && body == 0) - zeros_num = ih_item_len( ih ); - - pos_in_item = tb->tb_path->pos_in_item; - /* for indirect item pos_in_item is measured in unformatted node - pointers. Recalculate to bytes */ - if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) - pos_in_item *= UNFM_P_SIZE; - - if ( tb->lnum[0] > 0 ) { - /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ - if ( item_pos < tb->lnum[0] ) { - /* new item or it part falls to L[0], shift it too */ - n = B_NR_ITEMS(tb->L[0]); - - switch (flag) { - case M_INSERT: /* insert item into L[0] */ - - if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { - /* part of new item falls into L[0] */ - int new_item_len; - int version; - - ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1); - - /* Calculate item length to insert to S[0] */ - new_item_len = ih_item_len(ih) - tb->lbytes; - /* Calculate and check item length to insert to L[0] */ - put_ih_item_len(ih, ih_item_len(ih) - new_item_len ); - - RFALSE( ih_item_len(ih) <= 0, - "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", - ih_item_len(ih)); - - /* Insert new item into L[0] */ - bi.tb = tb; - bi.bi_bh = tb->L[0]; - bi.bi_parent = tb->FL[0]; - bi.bi_position = get_left_neighbor_position (tb, 0); - leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, - zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num); - - version = ih_version (ih); - - /* Calculate key component, item length and body to insert into S[0] */ - set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) ); - - put_ih_item_len( ih, new_item_len ); - if ( tb->lbytes > zeros_num ) { - body += (tb->lbytes - zeros_num); - zeros_num = 0; - } - else - zeros_num -= tb->lbytes; - - RFALSE( ih_item_len(ih) <= 0, - "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", - ih_item_len(ih)); - } else { - /* new item in whole falls into L[0] */ - /* Shift lnum[0]-1 items to L[0] */ - ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes); - /* Insert new item into L[0] */ - bi.tb = tb; - bi.bi_bh = tb->L[0]; - bi.bi_parent = tb->FL[0]; - bi.bi_position = get_left_neighbor_position (tb, 0); - leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num); - tb->insert_size[0] = 0; - zeros_num = 0; - } - break; - - case M_PASTE: /* append item in L[0] */ - - if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { - /* we must shift the part of the appended item */ - if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) { - - RFALSE( zeros_num, - "PAP-12090: invalid parameter in case of a directory"); - /* directory item */ - if ( tb->lbytes > pos_in_item ) { - /* new directory entry falls into L[0] */ - struct item_head * pasted; - int l_pos_in_item = pos_in_item; - - /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ - ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1); - if ( ret_val && ! item_pos ) { - pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1); - l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1); - } - - /* Append given directory entry to directory item */ - bi.tb = tb; - bi.bi_bh = tb->L[0]; - bi.bi_parent = tb->FL[0]; - bi.bi_position = get_left_neighbor_position (tb, 0); - leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item, - tb->insert_size[0], body, zeros_num); - - /* previous string prepared space for pasting new entry, following string pastes this entry */ - - /* when we have merge directory item, pos_in_item has been changed too */ - - /* paste new directory entry. 1 is entry number */ - leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1, - (struct reiserfs_de_head *)body, - body + DEH_SIZE, tb->insert_size[0] - ); - tb->insert_size[0] = 0; - } else { - /* new directory item doesn't fall into L[0] */ - /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ - leaf_shift_left (tb, tb->lnum[0], tb->lbytes); - } - /* Calculate new position to append in item body */ - pos_in_item -= tb->lbytes; - } - else { - /* regular object */ - RFALSE( tb->lbytes <= 0, - "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", - tb->lbytes); - RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)), - "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", - ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item); - - if ( tb->lbytes >= pos_in_item ) { - /* appended item will be in L[0] in whole */ - int l_n; - - /* this bytes number must be appended to the last item of L[h] */ - l_n = tb->lbytes - pos_in_item; - - /* Calculate new insert_size[0] */ - tb->insert_size[0] -= l_n; - - RFALSE( tb->insert_size[0] <= 0, - "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", - tb->insert_size[0]); - ret_val = leaf_shift_left(tb,tb->lnum[0], - ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos))); - /* Append to body of item in L[0] */ - bi.tb = tb; - bi.bi_bh = tb->L[0]; - bi.bi_parent = tb->FL[0]; - bi.bi_position = get_left_neighbor_position (tb, 0); - leaf_paste_in_buffer( - &bi,n + item_pos - ret_val, - ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)), - l_n,body, zeros_num > l_n ? l_n : zeros_num - ); - /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/ - { - int version; - int temp_l = l_n; - - RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)), - "PAP-12106: item length must be 0"); - RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0), - B_N_PKEY (tb->L[0], - n + item_pos - ret_val)), - "PAP-12107: items must be of the same file"); - if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0], - n + item_pos - ret_val))) { - temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT); + struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); + int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] + of the affected item */ + struct buffer_info bi; + struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ + int snum[2]; /* number of items that will be placed + into S_new (includes partially shifted + items) */ + int sbytes[2]; /* if an item is partially shifted into S_new then + if it is a directory item + it is the number of entries from the item that are shifted into S_new + else + it is the number of bytes from the item that are shifted into S_new + */ + int n, i; + int ret_val; + int pos_in_item; + int zeros_num; + + PROC_INFO_INC(tb->tb_sb, balance_at[0]); + + /* Make balance in case insert_size[0] < 0 */ + if (tb->insert_size[0] < 0) + return balance_leaf_when_delete(tb, flag); + + zeros_num = 0; + if (flag == M_INSERT && body == 0) + zeros_num = ih_item_len(ih); + + pos_in_item = tb->tb_path->pos_in_item; + /* for indirect item pos_in_item is measured in unformatted node + pointers. Recalculate to bytes */ + if (flag != M_INSERT + && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) + pos_in_item *= UNFM_P_SIZE; + + if (tb->lnum[0] > 0) { + /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ + if (item_pos < tb->lnum[0]) { + /* new item or it part falls to L[0], shift it too */ + n = B_NR_ITEMS(tb->L[0]); + + switch (flag) { + case M_INSERT: /* insert item into L[0] */ + + if (item_pos == tb->lnum[0] - 1 + && tb->lbytes != -1) { + /* part of new item falls into L[0] */ + int new_item_len; + int version; + + ret_val = + leaf_shift_left(tb, tb->lnum[0] - 1, + -1); + + /* Calculate item length to insert to S[0] */ + new_item_len = + ih_item_len(ih) - tb->lbytes; + /* Calculate and check item length to insert to L[0] */ + put_ih_item_len(ih, + ih_item_len(ih) - + new_item_len); + + RFALSE(ih_item_len(ih) <= 0, + "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", + ih_item_len(ih)); + + /* Insert new item into L[0] */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = tb->FL[0]; + bi.bi_position = + get_left_neighbor_position(tb, 0); + leaf_insert_into_buf(&bi, + n + item_pos - + ret_val, ih, body, + zeros_num > + ih_item_len(ih) ? + ih_item_len(ih) : + zeros_num); + + version = ih_version(ih); + + /* Calculate key component, item length and body to insert into S[0] */ + set_le_ih_k_offset(ih, + le_ih_k_offset(ih) + + (tb-> + lbytes << + (is_indirect_le_ih + (ih) ? tb->tb_sb-> + s_blocksize_bits - + UNFM_P_SHIFT : + 0))); + + put_ih_item_len(ih, new_item_len); + if (tb->lbytes > zeros_num) { + body += + (tb->lbytes - zeros_num); + zeros_num = 0; + } else + zeros_num -= tb->lbytes; + + RFALSE(ih_item_len(ih) <= 0, + "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", + ih_item_len(ih)); + } else { + /* new item in whole falls into L[0] */ + /* Shift lnum[0]-1 items to L[0] */ + ret_val = + leaf_shift_left(tb, tb->lnum[0] - 1, + tb->lbytes); + /* Insert new item into L[0] */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = tb->FL[0]; + bi.bi_position = + get_left_neighbor_position(tb, 0); + leaf_insert_into_buf(&bi, + n + item_pos - + ret_val, ih, body, + zeros_num); + tb->insert_size[0] = 0; + zeros_num = 0; } - /* update key of first item in S0 */ - version = ih_version (B_N_PITEM_HEAD (tbS0, 0)); - set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), - le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l); - /* update left delimiting key */ - set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), - le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l); - } - - /* Calculate new body, position in item and insert_size[0] */ - if ( l_n > zeros_num ) { - body += (l_n - zeros_num); - zeros_num = 0; - } - else - zeros_num -= l_n; - pos_in_item = 0; - - RFALSE( comp_short_le_keys - (B_N_PKEY(tbS0,0), - B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) || - - !op_is_left_mergeable - (B_N_PKEY (tbS0, 0), tbS0->b_size) || - !op_is_left_mergeable - (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), - tbS0->b_size), - "PAP-12120: item must be merge-able with left neighboring item"); - } - else /* only part of the appended item will be in L[0] */ - { - /* Calculate position in item for append in S[0] */ - pos_in_item -= tb->lbytes; - - RFALSE( pos_in_item <= 0, - "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item); - - /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ - leaf_shift_left(tb,tb->lnum[0],tb->lbytes); - } - } - } - else /* appended item will be in L[0] in whole */ - { - struct item_head * pasted; - - if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) ) - { /* if we paste into first item of S[0] and it is left mergable */ - /* then increment pos_in_item by the size of the last item in L[0] */ - pasted = B_N_PITEM_HEAD(tb->L[0],n-1); - if ( is_direntry_le_ih (pasted) ) - pos_in_item += ih_entry_count(pasted); - else - pos_in_item += ih_item_len(pasted); + break; + + case M_PASTE: /* append item in L[0] */ + + if (item_pos == tb->lnum[0] - 1 + && tb->lbytes != -1) { + /* we must shift the part of the appended item */ + if (is_direntry_le_ih + (B_N_PITEM_HEAD(tbS0, item_pos))) { + + RFALSE(zeros_num, + "PAP-12090: invalid parameter in case of a directory"); + /* directory item */ + if (tb->lbytes > pos_in_item) { + /* new directory entry falls into L[0] */ + struct item_head + *pasted; + int l_pos_in_item = + pos_in_item; + + /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ + ret_val = + leaf_shift_left(tb, + tb-> + lnum + [0], + tb-> + lbytes + - + 1); + if (ret_val + && !item_pos) { + pasted = + B_N_PITEM_HEAD + (tb->L[0], + B_NR_ITEMS + (tb-> + L[0]) - + 1); + l_pos_in_item += + I_ENTRY_COUNT + (pasted) - + (tb-> + lbytes - + 1); + } + + /* Append given directory entry to directory item */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = + tb->FL[0]; + bi.bi_position = + get_left_neighbor_position + (tb, 0); + leaf_paste_in_buffer + (&bi, + n + item_pos - + ret_val, + l_pos_in_item, + tb->insert_size[0], + body, zeros_num); + + /* previous string prepared space for pasting new entry, following string pastes this entry */ + + /* when we have merge directory item, pos_in_item has been changed too */ + + /* paste new directory entry. 1 is entry number */ + leaf_paste_entries(bi. + bi_bh, + n + + item_pos + - + ret_val, + l_pos_in_item, + 1, + (struct + reiserfs_de_head + *) + body, + body + + + DEH_SIZE, + tb-> + insert_size + [0] + ); + tb->insert_size[0] = 0; + } else { + /* new directory item doesn't fall into L[0] */ + /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ + leaf_shift_left(tb, + tb-> + lnum[0], + tb-> + lbytes); + } + /* Calculate new position to append in item body */ + pos_in_item -= tb->lbytes; + } else { + /* regular object */ + RFALSE(tb->lbytes <= 0, + "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", + tb->lbytes); + RFALSE(pos_in_item != + ih_item_len + (B_N_PITEM_HEAD + (tbS0, item_pos)), + "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", + ih_item_len + (B_N_PITEM_HEAD + (tbS0, item_pos)), + pos_in_item); + + if (tb->lbytes >= pos_in_item) { + /* appended item will be in L[0] in whole */ + int l_n; + + /* this bytes number must be appended to the last item of L[h] */ + l_n = + tb->lbytes - + pos_in_item; + + /* Calculate new insert_size[0] */ + tb->insert_size[0] -= + l_n; + + RFALSE(tb-> + insert_size[0] <= + 0, + "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", + tb-> + insert_size[0]); + ret_val = + leaf_shift_left(tb, + tb-> + lnum + [0], + ih_item_len + (B_N_PITEM_HEAD + (tbS0, + item_pos))); + /* Append to body of item in L[0] */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = + tb->FL[0]; + bi.bi_position = + get_left_neighbor_position + (tb, 0); + leaf_paste_in_buffer + (&bi, + n + item_pos - + ret_val, + ih_item_len + (B_N_PITEM_HEAD + (tb->L[0], + n + item_pos - + ret_val)), l_n, + body, + zeros_num > + l_n ? l_n : + zeros_num); + /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ + { + int version; + int temp_l = + l_n; + + RFALSE + (ih_item_len + (B_N_PITEM_HEAD + (tbS0, + 0)), + "PAP-12106: item length must be 0"); + RFALSE + (comp_short_le_keys + (B_N_PKEY + (tbS0, 0), + B_N_PKEY + (tb->L[0], + n + + item_pos + - + ret_val)), + "PAP-12107: items must be of the same file"); + if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) { + temp_l = + l_n + << + (tb-> + tb_sb-> + s_blocksize_bits + - + UNFM_P_SHIFT); + } + /* update key of first item in S0 */ + version = + ih_version + (B_N_PITEM_HEAD + (tbS0, 0)); + set_le_key_k_offset + (version, + B_N_PKEY + (tbS0, 0), + le_key_k_offset + (version, + B_N_PKEY + (tbS0, + 0)) + + temp_l); + /* update left delimiting key */ + set_le_key_k_offset + (version, + B_N_PDELIM_KEY + (tb-> + CFL[0], + tb-> + lkey[0]), + le_key_k_offset + (version, + B_N_PDELIM_KEY + (tb-> + CFL[0], + tb-> + lkey[0])) + + temp_l); + } + + /* Calculate new body, position in item and insert_size[0] */ + if (l_n > zeros_num) { + body += + (l_n - + zeros_num); + zeros_num = 0; + } else + zeros_num -= + l_n; + pos_in_item = 0; + + RFALSE + (comp_short_le_keys + (B_N_PKEY(tbS0, 0), + B_N_PKEY(tb->L[0], + B_NR_ITEMS + (tb-> + L[0]) - + 1)) + || + !op_is_left_mergeable + (B_N_PKEY(tbS0, 0), + tbS0->b_size) + || + !op_is_left_mergeable + (B_N_PDELIM_KEY + (tb->CFL[0], + tb->lkey[0]), + tbS0->b_size), + "PAP-12120: item must be merge-able with left neighboring item"); + } else { /* only part of the appended item will be in L[0] */ + + /* Calculate position in item for append in S[0] */ + pos_in_item -= + tb->lbytes; + + RFALSE(pos_in_item <= 0, + "PAP-12125: no place for paste. pos_in_item=%d", + pos_in_item); + + /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ + leaf_shift_left(tb, + tb-> + lnum[0], + tb-> + lbytes); + } + } + } else { /* appended item will be in L[0] in whole */ + + struct item_head *pasted; + + if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ + /* then increment pos_in_item by the size of the last item in L[0] */ + pasted = + B_N_PITEM_HEAD(tb->L[0], + n - 1); + if (is_direntry_le_ih(pasted)) + pos_in_item += + ih_entry_count + (pasted); + else + pos_in_item += + ih_item_len(pasted); + } + + /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ + ret_val = + leaf_shift_left(tb, tb->lnum[0], + tb->lbytes); + /* Append to body of item in L[0] */ + bi.tb = tb; + bi.bi_bh = tb->L[0]; + bi.bi_parent = tb->FL[0]; + bi.bi_position = + get_left_neighbor_position(tb, 0); + leaf_paste_in_buffer(&bi, + n + item_pos - + ret_val, + pos_in_item, + tb->insert_size[0], + body, zeros_num); + + /* if appended item is directory, paste entry */ + pasted = + B_N_PITEM_HEAD(tb->L[0], + n + item_pos - + ret_val); + if (is_direntry_le_ih(pasted)) + leaf_paste_entries(bi.bi_bh, + n + + item_pos - + ret_val, + pos_in_item, + 1, + (struct + reiserfs_de_head + *)body, + body + + DEH_SIZE, + tb-> + insert_size + [0] + ); + /* if appended item is indirect item, put unformatted node into un list */ + if (is_indirect_le_ih(pasted)) + set_ih_free_space(pasted, 0); + tb->insert_size[0] = 0; + zeros_num = 0; + } + break; + default: /* cases d and t */ + reiserfs_panic(tb->tb_sb, + "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)", + (flag == + M_DELETE) ? "DELETE" : ((flag == + M_CUT) + ? "CUT" + : + "UNKNOWN"), + flag); } - - /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ - ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes); - /* Append to body of item in L[0] */ - bi.tb = tb; - bi.bi_bh = tb->L[0]; - bi.bi_parent = tb->FL[0]; - bi.bi_position = get_left_neighbor_position (tb, 0); - leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0], - body, zeros_num); - - /* if appended item is directory, paste entry */ - pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val); - if (is_direntry_le_ih (pasted)) - leaf_paste_entries ( - bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1, - (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] - ); - /* if appended item is indirect item, put unformatted node into un list */ - if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, 0); - tb->insert_size[0] = 0; - zeros_num = 0; + } else { + /* new item doesn't fall into L[0] */ + leaf_shift_left(tb, tb->lnum[0], tb->lbytes); } - break; - default: /* cases d and t */ - reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)", - (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); - } - } else { - /* new item doesn't fall into L[0] */ - leaf_shift_left(tb,tb->lnum[0],tb->lbytes); } - } /* tb->lnum[0] > 0 */ - /* Calculate new item position */ - item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0)); - - if ( tb->rnum[0] > 0 ) { - /* shift rnum[0] items from S[0] to the right neighbor R[0] */ - n = B_NR_ITEMS(tbS0); - switch ( flag ) { - - case M_INSERT: /* insert item */ - if ( n - tb->rnum[0] < item_pos ) - { /* new item or its part falls to R[0] */ - if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 ) - { /* part of new item falls into R[0] */ - loff_t old_key_comp, old_len, r_zeros_number; - const char * r_body; - int version; - loff_t offset; - - leaf_shift_right(tb,tb->rnum[0]-1,-1); - - version = ih_version(ih); - /* Remember key component and item length */ - old_key_comp = le_ih_k_offset( ih ); - old_len = ih_item_len(ih); - - /* Calculate key component and item length to insert into R[0] */ - offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)); - set_le_ih_k_offset( ih, offset ); - put_ih_item_len( ih, tb->rbytes); - /* Insert part of the item into R[0] */ - bi.tb = tb; - bi.bi_bh = tb->R[0]; - bi.bi_parent = tb->FR[0]; - bi.bi_position = get_right_neighbor_position (tb, 0); - if ( (old_len - tb->rbytes) > zeros_num ) { - r_zeros_number = 0; - r_body = body + (old_len - tb->rbytes) - zeros_num; - } - else { - r_body = body; - r_zeros_number = zeros_num - (old_len - tb->rbytes); - zeros_num -= r_zeros_number; - } - - leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number); - - /* Replace right delimiting key by first key in R[0] */ - replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); - - /* Calculate key component and item length to insert into S[0] */ - set_le_ih_k_offset( ih, old_key_comp ); - put_ih_item_len( ih, old_len - tb->rbytes ); - - tb->insert_size[0] -= tb->rbytes; + /* tb->lnum[0] > 0 */ + /* Calculate new item position */ + item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); + + if (tb->rnum[0] > 0) { + /* shift rnum[0] items from S[0] to the right neighbor R[0] */ + n = B_NR_ITEMS(tbS0); + switch (flag) { + + case M_INSERT: /* insert item */ + if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ + if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ + loff_t old_key_comp, old_len, + r_zeros_number; + const char *r_body; + int version; + loff_t offset; + + leaf_shift_right(tb, tb->rnum[0] - 1, + -1); + + version = ih_version(ih); + /* Remember key component and item length */ + old_key_comp = le_ih_k_offset(ih); + old_len = ih_item_len(ih); + + /* Calculate key component and item length to insert into R[0] */ + offset = + le_ih_k_offset(ih) + + ((old_len - + tb-> + rbytes) << (is_indirect_le_ih(ih) + ? tb->tb_sb-> + s_blocksize_bits - + UNFM_P_SHIFT : 0)); + set_le_ih_k_offset(ih, offset); + put_ih_item_len(ih, tb->rbytes); + /* Insert part of the item into R[0] */ + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = + get_right_neighbor_position(tb, 0); + if ((old_len - tb->rbytes) > zeros_num) { + r_zeros_number = 0; + r_body = + body + (old_len - + tb->rbytes) - + zeros_num; + } else { + r_body = body; + r_zeros_number = + zeros_num - (old_len - + tb->rbytes); + zeros_num -= r_zeros_number; + } + + leaf_insert_into_buf(&bi, 0, ih, r_body, + r_zeros_number); + + /* Replace right delimiting key by first key in R[0] */ + replace_key(tb, tb->CFR[0], tb->rkey[0], + tb->R[0], 0); + + /* Calculate key component and item length to insert into S[0] */ + set_le_ih_k_offset(ih, old_key_comp); + put_ih_item_len(ih, + old_len - tb->rbytes); + + tb->insert_size[0] -= tb->rbytes; + + } else { /* whole new item falls into R[0] */ + + /* Shift rnum[0]-1 items to R[0] */ + ret_val = + leaf_shift_right(tb, + tb->rnum[0] - 1, + tb->rbytes); + /* Insert new item into R[0] */ + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = + get_right_neighbor_position(tb, 0); + leaf_insert_into_buf(&bi, + item_pos - n + + tb->rnum[0] - 1, + ih, body, + zeros_num); + + if (item_pos - n + tb->rnum[0] - 1 == 0) { + replace_key(tb, tb->CFR[0], + tb->rkey[0], + tb->R[0], 0); + + } + zeros_num = tb->insert_size[0] = 0; + } + } else { /* new item or part of it doesn't fall into R[0] */ - } - else /* whole new item falls into R[0] */ - { - /* Shift rnum[0]-1 items to R[0] */ - ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes); - /* Insert new item into R[0] */ - bi.tb = tb; - bi.bi_bh = tb->R[0]; - bi.bi_parent = tb->FR[0]; - bi.bi_position = get_right_neighbor_position (tb, 0); - leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num); - - if ( item_pos - n + tb->rnum[0] - 1 == 0 ) { - replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); - - } - zeros_num = tb->insert_size[0] = 0; - } - } - else /* new item or part of it doesn't fall into R[0] */ - { - leaf_shift_right(tb,tb->rnum[0],tb->rbytes); - } - break; - - case M_PASTE: /* append item */ - - if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */ - { - if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 ) - { /* we must shift the part of the appended item */ - if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos))) - { /* we append to directory item */ - int entry_count; - - RFALSE( zeros_num, - "PAP-12145: invalid parameter in case of a directory"); - entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos)); - if ( entry_count - tb->rbytes < pos_in_item ) - /* new directory entry falls into R[0] */ - { - int paste_entry_position; - - RFALSE( tb->rbytes - 1 >= entry_count || - ! tb->insert_size[0], - "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", - tb->rbytes, entry_count); - /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ - leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1); - /* Paste given directory entry to directory item */ - paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1; - bi.tb = tb; - bi.bi_bh = tb->R[0]; - bi.bi_parent = tb->FR[0]; - bi.bi_position = get_right_neighbor_position (tb, 0); - leaf_paste_in_buffer (&bi, 0, paste_entry_position, - tb->insert_size[0],body,zeros_num); - /* paste entry */ - leaf_paste_entries ( - bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body, - body + DEH_SIZE, tb->insert_size[0] - ); - - if ( paste_entry_position == 0 ) { - /* change delimiting keys */ - replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); - } - - tb->insert_size[0] = 0; - pos_in_item++; - } - else /* new directory entry doesn't fall into R[0] */ - { - leaf_shift_right(tb,tb->rnum[0],tb->rbytes); - } - } - else /* regular object */ - { - int n_shift, n_rem, r_zeros_number; - const char * r_body; - - /* Calculate number of bytes which must be shifted from appended item */ - if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 ) - n_shift = 0; - - RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)), - "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", - pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos))); - - leaf_shift_right(tb,tb->rnum[0],n_shift); - /* Calculate number of bytes which must remain in body after appending to R[0] */ - if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 ) - n_rem = 0; - - { - int version; - unsigned long temp_rem = n_rem; - - version = ih_version (B_N_PITEM_HEAD (tb->R[0],0)); - if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){ - temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - - UNFM_P_SHIFT); - } - set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0), - le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem); - set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]), - le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem); + leaf_shift_right(tb, tb->rnum[0], tb->rbytes); } + break; + + case M_PASTE: /* append item */ + + if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ + if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ + if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */ + int entry_count; + + RFALSE(zeros_num, + "PAP-12145: invalid parameter in case of a directory"); + entry_count = + I_ENTRY_COUNT(B_N_PITEM_HEAD + (tbS0, + item_pos)); + if (entry_count - tb->rbytes < + pos_in_item) + /* new directory entry falls into R[0] */ + { + int paste_entry_position; + + RFALSE(tb->rbytes - 1 >= + entry_count + || !tb-> + insert_size[0], + "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", + tb->rbytes, + entry_count); + /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ + leaf_shift_right(tb, + tb-> + rnum + [0], + tb-> + rbytes + - 1); + /* Paste given directory entry to directory item */ + paste_entry_position = + pos_in_item - + entry_count + + tb->rbytes - 1; + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = + tb->FR[0]; + bi.bi_position = + get_right_neighbor_position + (tb, 0); + leaf_paste_in_buffer + (&bi, 0, + paste_entry_position, + tb->insert_size[0], + body, zeros_num); + /* paste entry */ + leaf_paste_entries(bi. + bi_bh, + 0, + paste_entry_position, + 1, + (struct + reiserfs_de_head + *) + body, + body + + + DEH_SIZE, + tb-> + insert_size + [0] + ); + + if (paste_entry_position + == 0) { + /* change delimiting keys */ + replace_key(tb, + tb-> + CFR + [0], + tb-> + rkey + [0], + tb-> + R + [0], + 0); + } + + tb->insert_size[0] = 0; + pos_in_item++; + } else { /* new directory entry doesn't fall into R[0] */ + + leaf_shift_right(tb, + tb-> + rnum + [0], + tb-> + rbytes); + } + } else { /* regular object */ + + int n_shift, n_rem, + r_zeros_number; + const char *r_body; + + /* Calculate number of bytes which must be shifted from appended item */ + if ((n_shift = + tb->rbytes - + tb->insert_size[0]) < 0) + n_shift = 0; + + RFALSE(pos_in_item != + ih_item_len + (B_N_PITEM_HEAD + (tbS0, item_pos)), + "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", + pos_in_item, + ih_item_len + (B_N_PITEM_HEAD + (tbS0, item_pos))); + + leaf_shift_right(tb, + tb->rnum[0], + n_shift); + /* Calculate number of bytes which must remain in body after appending to R[0] */ + if ((n_rem = + tb->insert_size[0] - + tb->rbytes) < 0) + n_rem = 0; + + { + int version; + unsigned long temp_rem = + n_rem; + + version = + ih_version + (B_N_PITEM_HEAD + (tb->R[0], 0)); + if (is_indirect_le_key + (version, + B_N_PKEY(tb->R[0], + 0))) { + temp_rem = + n_rem << + (tb->tb_sb-> + s_blocksize_bits + - + UNFM_P_SHIFT); + } + set_le_key_k_offset + (version, + B_N_PKEY(tb->R[0], + 0), + le_key_k_offset + (version, + B_N_PKEY(tb->R[0], + 0)) + + temp_rem); + set_le_key_k_offset + (version, + B_N_PDELIM_KEY(tb-> + CFR + [0], + tb-> + rkey + [0]), + le_key_k_offset + (version, + B_N_PDELIM_KEY + (tb->CFR[0], + tb->rkey[0])) + + temp_rem); + } /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ - do_balance_mark_internal_dirty (tb, tb->CFR[0], 0); - - /* Append part of body into R[0] */ - bi.tb = tb; - bi.bi_bh = tb->R[0]; - bi.bi_parent = tb->FR[0]; - bi.bi_position = get_right_neighbor_position (tb, 0); - if ( n_rem > zeros_num ) { - r_zeros_number = 0; - r_body = body + n_rem - zeros_num; - } - else { - r_body = body; - r_zeros_number = zeros_num - n_rem; - zeros_num -= r_zeros_number; - } - - leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number); - - if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) { + do_balance_mark_internal_dirty + (tb, tb->CFR[0], 0); + + /* Append part of body into R[0] */ + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = + get_right_neighbor_position + (tb, 0); + if (n_rem > zeros_num) { + r_zeros_number = 0; + r_body = + body + n_rem - + zeros_num; + } else { + r_body = body; + r_zeros_number = + zeros_num - n_rem; + zeros_num -= + r_zeros_number; + } + + leaf_paste_in_buffer(&bi, 0, + n_shift, + tb-> + insert_size + [0] - + n_rem, + r_body, + r_zeros_number); + + if (is_indirect_le_ih + (B_N_PITEM_HEAD + (tb->R[0], 0))) { #if 0 - RFALSE( n_rem, - "PAP-12160: paste more than one unformatted node pointer"); + RFALSE(n_rem, + "PAP-12160: paste more than one unformatted node pointer"); #endif - set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0); - } - tb->insert_size[0] = n_rem; - if ( ! n_rem ) - pos_in_item ++; - } - } - else /* pasted item in whole falls into R[0] */ - { - struct item_head * pasted; + set_ih_free_space + (B_N_PITEM_HEAD + (tb->R[0], 0), 0); + } + tb->insert_size[0] = n_rem; + if (!n_rem) + pos_in_item++; + } + } else { /* pasted item in whole falls into R[0] */ + + struct item_head *pasted; + + ret_val = + leaf_shift_right(tb, tb->rnum[0], + tb->rbytes); + /* append item in R[0] */ + if (pos_in_item >= 0) { + bi.tb = tb; + bi.bi_bh = tb->R[0]; + bi.bi_parent = tb->FR[0]; + bi.bi_position = + get_right_neighbor_position + (tb, 0); + leaf_paste_in_buffer(&bi, + item_pos - + n + + tb-> + rnum[0], + pos_in_item, + tb-> + insert_size + [0], body, + zeros_num); + } + + /* paste new entry, if item is directory item */ + pasted = + B_N_PITEM_HEAD(tb->R[0], + item_pos - n + + tb->rnum[0]); + if (is_direntry_le_ih(pasted) + && pos_in_item >= 0) { + leaf_paste_entries(bi.bi_bh, + item_pos - + n + + tb->rnum[0], + pos_in_item, + 1, + (struct + reiserfs_de_head + *)body, + body + + DEH_SIZE, + tb-> + insert_size + [0] + ); + if (!pos_in_item) { + + RFALSE(item_pos - n + + tb->rnum[0], + "PAP-12165: directory item must be first item of node when pasting is in 0th position"); + + /* update delimiting keys */ + replace_key(tb, + tb->CFR[0], + tb->rkey[0], + tb->R[0], + 0); + } + } + + if (is_indirect_le_ih(pasted)) + set_ih_free_space(pasted, 0); + zeros_num = tb->insert_size[0] = 0; + } + } else { /* new item doesn't fall into R[0] */ - ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes); - /* append item in R[0] */ - if ( pos_in_item >= 0 ) { - bi.tb = tb; - bi.bi_bh = tb->R[0]; - bi.bi_parent = tb->FR[0]; - bi.bi_position = get_right_neighbor_position (tb, 0); - leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item, - tb->insert_size[0],body, zeros_num); - } - - /* paste new entry, if item is directory item */ - pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]); - if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) { - leaf_paste_entries ( - bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1, - (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] - ); - if ( ! pos_in_item ) { - - RFALSE( item_pos - n + tb->rnum[0], - "PAP-12165: directory item must be first item of node when pasting is in 0th position"); - - /* update delimiting keys */ - replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); + leaf_shift_right(tb, tb->rnum[0], tb->rbytes); } - } - - if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, 0); - zeros_num = tb->insert_size[0] = 0; + break; + default: /* cases d and t */ + reiserfs_panic(tb->tb_sb, + "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)", + (flag == + M_DELETE) ? "DELETE" : ((flag == + M_CUT) ? "CUT" + : "UNKNOWN"), + flag); } - } - else /* new item doesn't fall into R[0] */ - { - leaf_shift_right(tb,tb->rnum[0],tb->rbytes); - } - break; - default: /* cases d and t */ - reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)", - (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); - } - - } /* tb->rnum[0] > 0 */ - - - RFALSE( tb->blknum[0] > 3, - "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); - RFALSE( tb->blknum[0] < 0, - "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); - - /* if while adding to a node we discover that it is possible to split - it in two, and merge the left part into the left neighbor and the - right part into the right neighbor, eliminating the node */ - if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */ - - RFALSE( ! tb->lnum[0] || ! tb->rnum[0], - "PAP-12190: lnum and rnum must not be zero"); - /* if insertion was done before 0-th position in R[0], right - delimiting key of the tb->L[0]'s and left delimiting key are - not set correctly */ - if (tb->CFL[0]) { - if (!tb->CFR[0]) - reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized"); - copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0])); - do_balance_mark_internal_dirty (tb, tb->CFL[0], 0); - } - - reiserfs_invalidate_buffer(tb,tbS0); - return 0; - } - - - /* Fill new nodes that appear in place of S[0] */ - /* I am told that this copying is because we need an array to enable - the looping code. -Hans */ - snum[0] = tb->s1num, - snum[1] = tb->s2num; - sbytes[0] = tb->s1bytes; - sbytes[1] = tb->s2bytes; - for( i = tb->blknum[0] - 2; i >= 0; i-- ) { - - RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]); + } - /* here we shift from S to S_new nodes */ + /* tb->rnum[0] > 0 */ + RFALSE(tb->blknum[0] > 3, + "PAP-12180: blknum can not be %d. It must be <= 3", + tb->blknum[0]); + RFALSE(tb->blknum[0] < 0, + "PAP-12185: blknum can not be %d. It must be >= 0", + tb->blknum[0]); + + /* if while adding to a node we discover that it is possible to split + it in two, and merge the left part into the left neighbor and the + right part into the right neighbor, eliminating the node */ + if (tb->blknum[0] == 0) { /* node S[0] is empty now */ + + RFALSE(!tb->lnum[0] || !tb->rnum[0], + "PAP-12190: lnum and rnum must not be zero"); + /* if insertion was done before 0-th position in R[0], right + delimiting key of the tb->L[0]'s and left delimiting key are + not set correctly */ + if (tb->CFL[0]) { + if (!tb->CFR[0]) + reiserfs_panic(tb->tb_sb, + "vs-12195: balance_leaf: CFR not initialized"); + copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), + B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); + do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); + } - S_new[i] = get_FEB(tb); + reiserfs_invalidate_buffer(tb, tbS0); + return 0; + } - /* initialized block type and tree level */ - set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL ); + /* Fill new nodes that appear in place of S[0] */ + + /* I am told that this copying is because we need an array to enable + the looping code. -Hans */ + snum[0] = tb->s1num, snum[1] = tb->s2num; + sbytes[0] = tb->s1bytes; + sbytes[1] = tb->s2bytes; + for (i = tb->blknum[0] - 2; i >= 0; i--) { + + RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, + snum[i]); + + /* here we shift from S to S_new nodes */ + + S_new[i] = get_FEB(tb); + + /* initialized block type and tree level */ + set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); + + n = B_NR_ITEMS(tbS0); + + switch (flag) { + case M_INSERT: /* insert item */ + + if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ + if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ + int old_key_comp, old_len, + r_zeros_number; + const char *r_body; + int version; + + /* Move snum[i]-1 items from S[0] to S_new[i] */ + leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, + snum[i] - 1, -1, + S_new[i]); + /* Remember key component and item length */ + version = ih_version(ih); + old_key_comp = le_ih_k_offset(ih); + old_len = ih_item_len(ih); + + /* Calculate key component and item length to insert into S_new[i] */ + set_le_ih_k_offset(ih, + le_ih_k_offset(ih) + + ((old_len - + sbytes[i]) << + (is_indirect_le_ih + (ih) ? tb->tb_sb-> + s_blocksize_bits - + UNFM_P_SHIFT : + 0))); + + put_ih_item_len(ih, sbytes[i]); + + /* Insert part of the item into S_new[i] before 0-th item */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = NULL; + bi.bi_position = 0; + + if ((old_len - sbytes[i]) > zeros_num) { + r_zeros_number = 0; + r_body = + body + (old_len - + sbytes[i]) - + zeros_num; + } else { + r_body = body; + r_zeros_number = + zeros_num - (old_len - + sbytes[i]); + zeros_num -= r_zeros_number; + } + + leaf_insert_into_buf(&bi, 0, ih, r_body, + r_zeros_number); + + /* Calculate key component and item length to insert into S[i] */ + set_le_ih_k_offset(ih, old_key_comp); + put_ih_item_len(ih, + old_len - sbytes[i]); + tb->insert_size[0] -= sbytes[i]; + } else { /* whole new item falls into S_new[i] */ + + /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ + leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, + snum[i] - 1, sbytes[i], + S_new[i]); + + /* Insert new item into S_new[i] */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = NULL; + bi.bi_position = 0; + leaf_insert_into_buf(&bi, + item_pos - n + + snum[i] - 1, ih, + body, zeros_num); + + zeros_num = tb->insert_size[0] = 0; + } + } + else { /* new item or it part don't falls into S_new[i] */ - n = B_NR_ITEMS(tbS0); - - switch (flag) { - case M_INSERT: /* insert item */ - - if ( n - snum[i] < item_pos ) - { /* new item or it's part falls to first new node S_new[i]*/ - if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 ) - { /* part of new item falls into S_new[i] */ - int old_key_comp, old_len, r_zeros_number; - const char * r_body; - int version; - - /* Move snum[i]-1 items from S[0] to S_new[i] */ - leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]); - /* Remember key component and item length */ - version = ih_version (ih); - old_key_comp = le_ih_k_offset( ih ); - old_len = ih_item_len(ih); - - /* Calculate key component and item length to insert into S_new[i] */ - set_le_ih_k_offset( ih, - le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) ); - - put_ih_item_len( ih, sbytes[i] ); - - /* Insert part of the item into S_new[i] before 0-th item */ - bi.tb = tb; - bi.bi_bh = S_new[i]; - bi.bi_parent = NULL; - bi.bi_position = 0; - - if ( (old_len - sbytes[i]) > zeros_num ) { - r_zeros_number = 0; - r_body = body + (old_len - sbytes[i]) - zeros_num; - } - else { - r_body = body; - r_zeros_number = zeros_num - (old_len - sbytes[i]); - zeros_num -= r_zeros_number; - } - - leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number); - - /* Calculate key component and item length to insert into S[i] */ - set_le_ih_k_offset( ih, old_key_comp ); - put_ih_item_len( ih, old_len - sbytes[i] ); - tb->insert_size[0] -= sbytes[i]; - } - else /* whole new item falls into S_new[i] */ - { - /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ - leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]); - - /* Insert new item into S_new[i] */ - bi.tb = tb; - bi.bi_bh = S_new[i]; - bi.bi_parent = NULL; - bi.bi_position = 0; - leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num); - - zeros_num = tb->insert_size[0] = 0; - } - } - - else /* new item or it part don't falls into S_new[i] */ - { - leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); - } - break; - - case M_PASTE: /* append item */ - - if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */ - { - if ( item_pos == n - snum[i] && sbytes[i] != -1 ) - { /* we must shift part of the appended item */ - struct item_head * aux_ih; - - RFALSE( ih, "PAP-12210: ih must be 0"); - - if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) { - /* we append to directory item */ - - int entry_count; - - entry_count = ih_entry_count(aux_ih); - - if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) { - /* new directory entry falls into S_new[i] */ - - RFALSE( ! tb->insert_size[0], - "PAP-12215: insert_size is already 0"); - RFALSE( sbytes[i] - 1 >= entry_count, - "PAP-12220: there are no so much entries (%d), only %d", - sbytes[i] - 1, entry_count); - - /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ - leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]); - /* Paste given directory entry to directory item */ - bi.tb = tb; - bi.bi_bh = S_new[i]; - bi.bi_parent = NULL; - bi.bi_position = 0; - leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, - tb->insert_size[0], body,zeros_num); - /* paste new directory entry */ - leaf_paste_entries ( - bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1, - 1, (struct reiserfs_de_head *)body, body + DEH_SIZE, - tb->insert_size[0] - ); - tb->insert_size[0] = 0; - pos_in_item++; - } else { /* new directory entry doesn't fall into S_new[i] */ - leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); + leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, + snum[i], sbytes[i], S_new[i]); } - } - else /* regular object */ - { - int n_shift, n_rem, r_zeros_number; - const char * r_body; - - RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) || - tb->insert_size[0] <= 0, - "PAP-12225: item too short or insert_size <= 0"); - - /* Calculate number of bytes which must be shifted from appended item */ - n_shift = sbytes[i] - tb->insert_size[0]; - if ( n_shift < 0 ) - n_shift = 0; - leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); - - /* Calculate number of bytes which must remain in body after append to S_new[i] */ - n_rem = tb->insert_size[0] - sbytes[i]; - if ( n_rem < 0 ) - n_rem = 0; - /* Append part of body into S_new[0] */ - bi.tb = tb; - bi.bi_bh = S_new[i]; - bi.bi_parent = NULL; - bi.bi_position = 0; + break; + + case M_PASTE: /* append item */ + + if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ + if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ + struct item_head *aux_ih; + + RFALSE(ih, "PAP-12210: ih must be 0"); + + if (is_direntry_le_ih + (aux_ih = + B_N_PITEM_HEAD(tbS0, item_pos))) { + /* we append to directory item */ + + int entry_count; + + entry_count = + ih_entry_count(aux_ih); + + if (entry_count - sbytes[i] < + pos_in_item + && pos_in_item <= + entry_count) { + /* new directory entry falls into S_new[i] */ + + RFALSE(!tb-> + insert_size[0], + "PAP-12215: insert_size is already 0"); + RFALSE(sbytes[i] - 1 >= + entry_count, + "PAP-12220: there are no so much entries (%d), only %d", + sbytes[i] - 1, + entry_count); + + /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ + leaf_move_items + (LEAF_FROM_S_TO_SNEW, + tb, snum[i], + sbytes[i] - 1, + S_new[i]); + /* Paste given directory entry to directory item */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = NULL; + bi.bi_position = 0; + leaf_paste_in_buffer + (&bi, 0, + pos_in_item - + entry_count + + sbytes[i] - 1, + tb->insert_size[0], + body, zeros_num); + /* paste new directory entry */ + leaf_paste_entries(bi. + bi_bh, + 0, + pos_in_item + - + entry_count + + + sbytes + [i] - + 1, 1, + (struct + reiserfs_de_head + *) + body, + body + + + DEH_SIZE, + tb-> + insert_size + [0] + ); + tb->insert_size[0] = 0; + pos_in_item++; + } else { /* new directory entry doesn't fall into S_new[i] */ + leaf_move_items + (LEAF_FROM_S_TO_SNEW, + tb, snum[i], + sbytes[i], + S_new[i]); + } + } else { /* regular object */ + + int n_shift, n_rem, + r_zeros_number; + const char *r_body; + + RFALSE(pos_in_item != + ih_item_len + (B_N_PITEM_HEAD + (tbS0, item_pos)) + || tb->insert_size[0] <= + 0, + "PAP-12225: item too short or insert_size <= 0"); + + /* Calculate number of bytes which must be shifted from appended item */ + n_shift = + sbytes[i] - + tb->insert_size[0]; + if (n_shift < 0) + n_shift = 0; + leaf_move_items + (LEAF_FROM_S_TO_SNEW, tb, + snum[i], n_shift, + S_new[i]); + + /* Calculate number of bytes which must remain in body after append to S_new[i] */ + n_rem = + tb->insert_size[0] - + sbytes[i]; + if (n_rem < 0) + n_rem = 0; + /* Append part of body into S_new[0] */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = NULL; + bi.bi_position = 0; + + if (n_rem > zeros_num) { + r_zeros_number = 0; + r_body = + body + n_rem - + zeros_num; + } else { + r_body = body; + r_zeros_number = + zeros_num - n_rem; + zeros_num -= + r_zeros_number; + } + + leaf_paste_in_buffer(&bi, 0, + n_shift, + tb-> + insert_size + [0] - + n_rem, + r_body, + r_zeros_number); + { + struct item_head *tmp; + + tmp = + B_N_PITEM_HEAD(S_new + [i], + 0); + if (is_indirect_le_ih + (tmp)) { + set_ih_free_space + (tmp, 0); + set_le_ih_k_offset + (tmp, + le_ih_k_offset + (tmp) + + (n_rem << + (tb-> + tb_sb-> + s_blocksize_bits + - + UNFM_P_SHIFT))); + } else { + set_le_ih_k_offset + (tmp, + le_ih_k_offset + (tmp) + + n_rem); + } + } + + tb->insert_size[0] = n_rem; + if (!n_rem) + pos_in_item++; + } + } else + /* item falls wholly into S_new[i] */ + { + int ret_val; + struct item_head *pasted; - if ( n_rem > zeros_num ) { - r_zeros_number = 0; - r_body = body + n_rem - zeros_num; - } - else { - r_body = body; - r_zeros_number = zeros_num - n_rem; - zeros_num -= r_zeros_number; +#ifdef CONFIG_REISERFS_CHECK + struct item_head *ih = + B_N_PITEM_HEAD(tbS0, item_pos); + + if (!is_direntry_le_ih(ih) + && (pos_in_item != ih_item_len(ih) + || tb->insert_size[0] <= 0)) + reiserfs_panic(tb->tb_sb, + "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); +#endif /* CONFIG_REISERFS_CHECK */ + + ret_val = + leaf_move_items(LEAF_FROM_S_TO_SNEW, + tb, snum[i], + sbytes[i], + S_new[i]); + + RFALSE(ret_val, + "PAP-12240: unexpected value returned by leaf_move_items (%d)", + ret_val); + + /* paste into item */ + bi.tb = tb; + bi.bi_bh = S_new[i]; + bi.bi_parent = NULL; + bi.bi_position = 0; + leaf_paste_in_buffer(&bi, + item_pos - n + + snum[i], + pos_in_item, + tb->insert_size[0], + body, zeros_num); + + pasted = + B_N_PITEM_HEAD(S_new[i], + item_pos - n + + snum[i]); + if (is_direntry_le_ih(pasted)) { + leaf_paste_entries(bi.bi_bh, + item_pos - + n + snum[i], + pos_in_item, + 1, + (struct + reiserfs_de_head + *)body, + body + + DEH_SIZE, + tb-> + insert_size + [0] + ); + } + + /* if we paste to indirect item update ih_free_space */ + if (is_indirect_le_ih(pasted)) + set_ih_free_space(pasted, 0); + zeros_num = tb->insert_size[0] = 0; + } } - leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number); - { - struct item_head * tmp; - - tmp = B_N_PITEM_HEAD(S_new[i],0); - if (is_indirect_le_ih (tmp)) { - set_ih_free_space (tmp, 0); - set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + - (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT))); - } else { - set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + - n_rem ); - } - } + else { /* pasted item doesn't fall into S_new[i] */ - tb->insert_size[0] = n_rem; - if ( ! n_rem ) - pos_in_item++; - } + leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, + snum[i], sbytes[i], S_new[i]); + } + break; + default: /* cases d and t */ + reiserfs_panic(tb->tb_sb, + "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)", + (flag == + M_DELETE) ? "DELETE" : ((flag == + M_CUT) ? "CUT" + : "UNKNOWN"), + flag); } - else - /* item falls wholly into S_new[i] */ - { - int ret_val; - struct item_head * pasted; -#ifdef CONFIG_REISERFS_CHECK - struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos); - - if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) || - tb->insert_size[0] <= 0) ) - reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); -#endif /* CONFIG_REISERFS_CHECK */ - - ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); - - RFALSE( ret_val, - "PAP-12240: unexpected value returned by leaf_move_items (%d)", - ret_val); - - /* paste into item */ - bi.tb = tb; - bi.bi_bh = S_new[i]; - bi.bi_parent = NULL; - bi.bi_position = 0; - leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num); - - pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]); - if (is_direntry_le_ih (pasted)) - { - leaf_paste_entries ( - bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1, - (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] - ); - } - - /* if we paste to indirect item update ih_free_space */ - if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, 0); - zeros_num = tb->insert_size[0] = 0; - } - } - - else /* pasted item doesn't fall into S_new[i] */ - { - leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); - } - break; - default: /* cases d and t */ - reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)", - (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); + memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); + insert_ptr[i] = S_new[i]; + + RFALSE(!buffer_journaled(S_new[i]) + || buffer_journal_dirty(S_new[i]) + || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", + i, S_new[i]); } - memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE); - insert_ptr[i] = S_new[i]; - - RFALSE (!buffer_journaled (S_new [i]) || buffer_journal_dirty (S_new [i]) || - buffer_dirty (S_new [i]), - "PAP-12247: S_new[%d] : (%b)", i, S_new[i]); - } - - /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the - affected item which remains in S */ - if ( 0 <= item_pos && item_pos < tb->s0num ) - { /* if we must insert or append into buffer S[0] */ - - switch (flag) - { - case M_INSERT: /* insert item into S[0] */ - bi.tb = tb; - bi.bi_bh = tbS0; - bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); - bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); - leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num); - - /* If we insert the first key change the delimiting key */ - if( item_pos == 0 ) { - if (tb->CFL[0]) /* can be 0 in reiserfsck */ - replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0); - - } - break; - - case M_PASTE: { /* append item in S[0] */ - struct item_head * pasted; - - pasted = B_N_PITEM_HEAD (tbS0, item_pos); - /* when directory, may be new entry already pasted */ - if (is_direntry_le_ih (pasted)) { - if ( pos_in_item >= 0 && - pos_in_item <= ih_entry_count(pasted) ) { - - RFALSE( ! tb->insert_size[0], - "PAP-12260: insert_size is 0 already"); - - /* prepare space */ - bi.tb = tb; - bi.bi_bh = tbS0; - bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); - bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); - leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); - - /* paste entry */ - leaf_paste_entries ( - bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body, - body + DEH_SIZE, tb->insert_size[0] - ); - if ( ! item_pos && ! pos_in_item ) { - RFALSE( !tb->CFL[0] || !tb->L[0], - "PAP-12270: CFL[0]/L[0] must be specified"); - if (tb->CFL[0]) { - replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0); + /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the + affected item which remains in S */ + if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */ + + switch (flag) { + case M_INSERT: /* insert item into S[0] */ + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0); + bi.bi_position = PATH_H_POSITION(tb->tb_path, 1); + leaf_insert_into_buf(&bi, item_pos, ih, body, + zeros_num); + + /* If we insert the first key change the delimiting key */ + if (item_pos == 0) { + if (tb->CFL[0]) /* can be 0 in reiserfsck */ + replace_key(tb, tb->CFL[0], tb->lkey[0], + tbS0, 0); } - } - tb->insert_size[0] = 0; - } - } else { /* regular object */ - if ( pos_in_item == ih_item_len(pasted) ) { - - RFALSE( tb->insert_size[0] <= 0, - "PAP-12275: insert size must not be %d", - tb->insert_size[0]); - bi.tb = tb; - bi.bi_bh = tbS0; - bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); - bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); - leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); - - if (is_indirect_le_ih (pasted)) { + break; + + case M_PASTE:{ /* append item in S[0] */ + struct item_head *pasted; + + pasted = B_N_PITEM_HEAD(tbS0, item_pos); + /* when directory, may be new entry already pasted */ + if (is_direntry_le_ih(pasted)) { + if (pos_in_item >= 0 && + pos_in_item <= + ih_entry_count(pasted)) { + + RFALSE(!tb->insert_size[0], + "PAP-12260: insert_size is 0 already"); + + /* prepare space */ + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = + PATH_H_PPARENT(tb->tb_path, + 0); + bi.bi_position = + PATH_H_POSITION(tb->tb_path, + 1); + leaf_paste_in_buffer(&bi, + item_pos, + pos_in_item, + tb-> + insert_size + [0], body, + zeros_num); + + /* paste entry */ + leaf_paste_entries(bi.bi_bh, + item_pos, + pos_in_item, + 1, + (struct + reiserfs_de_head + *)body, + body + + DEH_SIZE, + tb-> + insert_size + [0] + ); + if (!item_pos && !pos_in_item) { + RFALSE(!tb->CFL[0] + || !tb->L[0], + "PAP-12270: CFL[0]/L[0] must be specified"); + if (tb->CFL[0]) { + replace_key(tb, + tb-> + CFL + [0], + tb-> + lkey + [0], + tbS0, + 0); + + } + } + tb->insert_size[0] = 0; + } + } else { /* regular object */ + if (pos_in_item == ih_item_len(pasted)) { + + RFALSE(tb->insert_size[0] <= 0, + "PAP-12275: insert size must not be %d", + tb->insert_size[0]); + bi.tb = tb; + bi.bi_bh = tbS0; + bi.bi_parent = + PATH_H_PPARENT(tb->tb_path, + 0); + bi.bi_position = + PATH_H_POSITION(tb->tb_path, + 1); + leaf_paste_in_buffer(&bi, + item_pos, + pos_in_item, + tb-> + insert_size + [0], body, + zeros_num); + + if (is_indirect_le_ih(pasted)) { #if 0 - RFALSE( tb->insert_size[0] != UNFM_P_SIZE, - "PAP-12280: insert_size for indirect item must be %d, not %d", - UNFM_P_SIZE, tb->insert_size[0]); + RFALSE(tb-> + insert_size[0] != + UNFM_P_SIZE, + "PAP-12280: insert_size for indirect item must be %d, not %d", + UNFM_P_SIZE, + tb-> + insert_size[0]); #endif - set_ih_free_space (pasted, 0); - } - tb->insert_size[0] = 0; - } - + set_ih_free_space + (pasted, 0); + } + tb->insert_size[0] = 0; + } #ifdef CONFIG_REISERFS_CHECK - else { - if ( tb->insert_size[0] ) { - print_cur_tb ("12285"); - reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]); - } + else { + if (tb->insert_size[0]) { + print_cur_tb("12285"); + reiserfs_panic(tb-> + tb_sb, + "PAP-12285: balance_leaf: insert_size must be 0 (%d)", + tb-> + insert_size + [0]); + } + } +#endif /* CONFIG_REISERFS_CHECK */ + + } + } /* case M_PASTE: */ } -#endif /* CONFIG_REISERFS_CHECK */ - - } - } /* case M_PASTE: */ } - } - #ifdef CONFIG_REISERFS_CHECK - if ( flag == M_PASTE && tb->insert_size[0] ) { - print_cur_tb ("12290"); - reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]); - } -#endif /* CONFIG_REISERFS_CHECK */ - - return 0; -} /* Leaf level of the tree is balanced (end of balance_leaf) */ - + if (flag == M_PASTE && tb->insert_size[0]) { + print_cur_tb("12290"); + reiserfs_panic(tb->tb_sb, + "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", + tb->insert_size[0]); + } +#endif /* CONFIG_REISERFS_CHECK */ + return 0; +} /* Leaf level of the tree is balanced (end of balance_leaf) */ /* Make empty node */ -void make_empty_node (struct buffer_info * bi) +void make_empty_node(struct buffer_info *bi) { - struct block_head * blkh; + struct block_head *blkh; - RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); + RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); - blkh = B_BLK_HEAD(bi->bi_bh); - set_blkh_nr_item( blkh, 0 ); - set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) ); + blkh = B_BLK_HEAD(bi->bi_bh); + set_blkh_nr_item(blkh, 0); + set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); - if (bi->bi_parent) - B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ + if (bi->bi_parent) + B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ } - /* Get first empty buffer */ -struct buffer_head * get_FEB (struct tree_balance * tb) +struct buffer_head *get_FEB(struct tree_balance *tb) { - int i; - struct buffer_head * first_b; - struct buffer_info bi; - - for (i = 0; i < MAX_FEB_SIZE; i ++) - if (tb->FEB[i] != 0) - break; - - if (i == MAX_FEB_SIZE) - reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty"); - - bi.tb = tb; - bi.bi_bh = first_b = tb->FEB[i]; - bi.bi_parent = NULL; - bi.bi_position = 0; - make_empty_node (&bi); - set_buffer_uptodate(first_b); - tb->FEB[i] = NULL; - tb->used[i] = first_b; - - return(first_b); -} + int i; + struct buffer_head *first_b; + struct buffer_info bi; + for (i = 0; i < MAX_FEB_SIZE; i++) + if (tb->FEB[i] != 0) + break; + + if (i == MAX_FEB_SIZE) + reiserfs_panic(tb->tb_sb, + "vs-12300: get_FEB: FEB list is empty"); + + bi.tb = tb; + bi.bi_bh = first_b = tb->FEB[i]; + bi.bi_parent = NULL; + bi.bi_position = 0; + make_empty_node(&bi); + set_buffer_uptodate(first_b); + tb->FEB[i] = NULL; + tb->used[i] = first_b; + + return (first_b); +} /* This is now used because reiserfs_free_block has to be able to ** schedule. */ -static void store_thrown (struct tree_balance * tb, struct buffer_head * bh) +static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) { - int i; - - if (buffer_dirty (bh)) - reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer"); - for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++) - if (!tb->thrown[i]) { - tb->thrown[i] = bh; - get_bh(bh) ; /* free_thrown puts this */ - return; - } - reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers"); + int i; + + if (buffer_dirty(bh)) + reiserfs_warning(tb->tb_sb, + "store_thrown deals with dirty buffer"); + for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) + if (!tb->thrown[i]) { + tb->thrown[i] = bh; + get_bh(bh); /* free_thrown puts this */ + return; + } + reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers"); } -static void free_thrown(struct tree_balance *tb) { - int i ; - b_blocknr_t blocknr ; - for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) { - if (tb->thrown[i]) { - blocknr = tb->thrown[i]->b_blocknr ; - if (buffer_dirty (tb->thrown[i])) - reiserfs_warning (tb->tb_sb, - "free_thrown deals with dirty buffer %d", - blocknr); - brelse(tb->thrown[i]) ; /* incremented in store_thrown */ - reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0); +static void free_thrown(struct tree_balance *tb) +{ + int i; + b_blocknr_t blocknr; + for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) { + if (tb->thrown[i]) { + blocknr = tb->thrown[i]->b_blocknr; + if (buffer_dirty(tb->thrown[i])) + reiserfs_warning(tb->tb_sb, + "free_thrown deals with dirty buffer %d", + blocknr); + brelse(tb->thrown[i]); /* incremented in store_thrown */ + reiserfs_free_block(tb->transaction_handle, NULL, + blocknr, 0); + } } - } } -void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh) +void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) { - struct block_head *blkh; - blkh = B_BLK_HEAD(bh); - set_blkh_level( blkh, FREE_LEVEL ); - set_blkh_nr_item( blkh, 0 ); - - clear_buffer_dirty(bh); - store_thrown (tb, bh); + struct block_head *blkh; + blkh = B_BLK_HEAD(bh); + set_blkh_level(blkh, FREE_LEVEL); + set_blkh_nr_item(blkh, 0); + + clear_buffer_dirty(bh); + store_thrown(tb, bh); } /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ -void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest, - struct buffer_head * src, int n_src) +void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, + struct buffer_head *src, int n_src) { - RFALSE( dest == NULL || src == NULL, - "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", - src, dest); - RFALSE( ! B_IS_KEYS_LEVEL (dest), - "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", - dest); - RFALSE( n_dest < 0 || n_src < 0, - "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); - RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), - "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", - n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); - - if (B_IS_ITEMS_LEVEL (src)) - /* source buffer contains leaf node */ - memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE); - else - memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE); - - do_balance_mark_internal_dirty (tb, dest, 0); + RFALSE(dest == NULL || src == NULL, + "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", + src, dest); + RFALSE(!B_IS_KEYS_LEVEL(dest), + "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", + dest); + RFALSE(n_dest < 0 || n_src < 0, + "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); + RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), + "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", + n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); + + if (B_IS_ITEMS_LEVEL(src)) + /* source buffer contains leaf node */ + memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src), + KEY_SIZE); + else + memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src), + KEY_SIZE); + + do_balance_mark_internal_dirty(tb, dest, 0); } - -int get_left_neighbor_position ( - struct tree_balance * tb, - int h - ) +int get_left_neighbor_position(struct tree_balance *tb, int h) { - int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); + int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); - RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0, - "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", - h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h)); + RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FL[h] == 0, + "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", + h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); - if (Sh_position == 0) - return B_NR_ITEMS (tb->FL[h]); - else - return Sh_position - 1; + if (Sh_position == 0) + return B_NR_ITEMS(tb->FL[h]); + else + return Sh_position - 1; } - -int get_right_neighbor_position (struct tree_balance * tb, int h) +int get_right_neighbor_position(struct tree_balance *tb, int h) { - int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); + int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); - RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0, - "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", - h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]); + RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FR[h] == 0, + "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", + h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); - if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h))) - return 0; - else - return Sh_position + 1; + if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) + return 0; + else + return Sh_position + 1; } - #ifdef CONFIG_REISERFS_CHECK -int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value); -static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes) +int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); +static void check_internal_node(struct super_block *s, struct buffer_head *bh, + char *mes) { - struct disk_child * dc; - int i; - - RFALSE( !bh, "PAP-12336: bh == 0"); - - if (!bh || !B_IS_IN_TREE (bh)) - return; - - RFALSE( !buffer_dirty (bh) && - !(buffer_journaled(bh) || buffer_journal_dirty(bh)), - "PAP-12337: buffer (%b) must be dirty", bh); - dc = B_N_CHILD (bh, 0); - - for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) { - if (!is_reusable (s, dc_block_number(dc), 1) ) { - print_cur_tb (mes); - reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh); - } - } -} + struct disk_child *dc; + int i; + RFALSE(!bh, "PAP-12336: bh == 0"); -static int locked_or_not_in_tree (struct buffer_head * bh, char * which) -{ - if ( (!buffer_journal_prepared (bh) && buffer_locked (bh)) || - !B_IS_IN_TREE (bh) ) { - reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)", - which, bh); - return 1; - } - return 0; -} + if (!bh || !B_IS_IN_TREE(bh)) + return; + RFALSE(!buffer_dirty(bh) && + !(buffer_journaled(bh) || buffer_journal_dirty(bh)), + "PAP-12337: buffer (%b) must be dirty", bh); + dc = B_N_CHILD(bh, 0); -static int check_before_balancing (struct tree_balance * tb) -{ - int retval = 0; - - if ( cur_tb ) { - reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: " - "suspect that schedule occurred based on cur_tb not being null at this point in code. " - "do_balance cannot properly handle schedule occurring while it runs."); - } - - /* double check that buffers that we will modify are unlocked. (fix_nodes should already have - prepped all of these for us). */ - if ( tb->lnum[0] ) { - retval |= locked_or_not_in_tree (tb->L[0], "L[0]"); - retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]"); - retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]"); - check_leaf (tb->L[0]); - } - if ( tb->rnum[0] ) { - retval |= locked_or_not_in_tree (tb->R[0], "R[0]"); - retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]"); - retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]"); - check_leaf (tb->R[0]); - } - retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]"); - check_leaf (PATH_PLAST_BUFFER (tb->tb_path)); - - return retval; + for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { + if (!is_reusable(s, dc_block_number(dc), 1)) { + print_cur_tb(mes); + reiserfs_panic(s, + "PAP-12338: check_internal_node: invalid child pointer %y in %b", + dc, bh); + } + } } +static int locked_or_not_in_tree(struct buffer_head *bh, char *which) +{ + if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || + !B_IS_IN_TREE(bh)) { + reiserfs_warning(NULL, + "vs-12339: locked_or_not_in_tree: %s (%b)", + which, bh); + return 1; + } + return 0; +} -static void check_after_balance_leaf (struct tree_balance * tb) +static int check_before_balancing(struct tree_balance *tb) { - if (tb->lnum[0]) { - if (B_FREE_SPACE (tb->L[0]) != - MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) { - print_cur_tb ("12221"); - reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect"); + int retval = 0; + + if (cur_tb) { + reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: " + "suspect that schedule occurred based on cur_tb not being null at this point in code. " + "do_balance cannot properly handle schedule occurring while it runs."); } - } - if (tb->rnum[0]) { - if (B_FREE_SPACE (tb->R[0]) != - MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) { - print_cur_tb ("12222"); - reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect"); + + /* double check that buffers that we will modify are unlocked. (fix_nodes should already have + prepped all of these for us). */ + if (tb->lnum[0]) { + retval |= locked_or_not_in_tree(tb->L[0], "L[0]"); + retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]"); + retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]"); + check_leaf(tb->L[0]); } - } - if (PATH_H_PBUFFER(tb->tb_path,1) && - (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) != - (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - - dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), - PATH_H_POSITION (tb->tb_path, 1)))) )) { - int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)); - int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - - dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), - PATH_H_POSITION (tb->tb_path, 1)))); - print_cur_tb ("12223"); - reiserfs_warning (tb->tb_sb, - "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " - "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", - left, - MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)), - PATH_H_PBUFFER(tb->tb_path,1), - PATH_H_POSITION (tb->tb_path, 1), - dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ), - right ); - reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect"); - } -} + if (tb->rnum[0]) { + retval |= locked_or_not_in_tree(tb->R[0], "R[0]"); + retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]"); + retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]"); + check_leaf(tb->R[0]); + } + retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]"); + check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); + return retval; +} -static void check_leaf_level (struct tree_balance * tb) +static void check_after_balance_leaf(struct tree_balance *tb) { - check_leaf (tb->L[0]); - check_leaf (tb->R[0]); - check_leaf (PATH_PLAST_BUFFER (tb->tb_path)); + if (tb->lnum[0]) { + if (B_FREE_SPACE(tb->L[0]) != + MAX_CHILD_SIZE(tb->L[0]) - + dc_size(B_N_CHILD + (tb->FL[0], get_left_neighbor_position(tb, 0)))) { + print_cur_tb("12221"); + reiserfs_panic(tb->tb_sb, + "PAP-12355: check_after_balance_leaf: shift to left was incorrect"); + } + } + if (tb->rnum[0]) { + if (B_FREE_SPACE(tb->R[0]) != + MAX_CHILD_SIZE(tb->R[0]) - + dc_size(B_N_CHILD + (tb->FR[0], get_right_neighbor_position(tb, 0)))) { + print_cur_tb("12222"); + reiserfs_panic(tb->tb_sb, + "PAP-12360: check_after_balance_leaf: shift to right was incorrect"); + } + } + if (PATH_H_PBUFFER(tb->tb_path, 1) && + (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != + (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - + dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), + PATH_H_POSITION(tb->tb_path, 1)))))) { + int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); + int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - + dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), + PATH_H_POSITION(tb->tb_path, + 1)))); + print_cur_tb("12223"); + reiserfs_warning(tb->tb_sb, + "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " + "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", + left, + MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), + PATH_H_PBUFFER(tb->tb_path, 1), + PATH_H_POSITION(tb->tb_path, 1), + dc_size(B_N_CHILD + (PATH_H_PBUFFER(tb->tb_path, 1), + PATH_H_POSITION(tb->tb_path, 1))), + right); + reiserfs_panic(tb->tb_sb, + "PAP-12365: check_after_balance_leaf: S is incorrect"); + } } -static void check_internal_levels (struct tree_balance * tb) +static void check_leaf_level(struct tree_balance *tb) { - int h; + check_leaf(tb->L[0]); + check_leaf(tb->R[0]); + check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); +} - /* check all internal nodes */ - for (h = 1; tb->insert_size[h]; h ++) { - check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH"); - if (tb->lnum[h]) - check_internal_node (tb->tb_sb, tb->L[h], "BAD L"); - if (tb->rnum[h]) - check_internal_node (tb->tb_sb, tb->R[h], "BAD R"); - } +static void check_internal_levels(struct tree_balance *tb) +{ + int h; + + /* check all internal nodes */ + for (h = 1; tb->insert_size[h]; h++) { + check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), + "BAD BUFFER ON PATH"); + if (tb->lnum[h]) + check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); + if (tb->rnum[h]) + check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); + } } #endif - - - - - /* Now we have all of the buffers that must be used in balancing of the tree. We rely on the assumption that schedule() will not occur while do_balance works. ( Only interrupt handlers are acceptable.) @@ -1484,114 +2029,109 @@ static void check_internal_levels (struct tree_balance * tb) */ -static inline void do_balance_starts (struct tree_balance *tb) +static inline void do_balance_starts(struct tree_balance *tb) { - /* use print_cur_tb() to see initial state of struct - tree_balance */ + /* use print_cur_tb() to see initial state of struct + tree_balance */ - /* store_print_tb (tb); */ + /* store_print_tb (tb); */ - /* do not delete, just comment it out */ + /* do not delete, just comment it out */ /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, "check");*/ - RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB"); + RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); #ifdef CONFIG_REISERFS_CHECK - cur_tb = tb; + cur_tb = tb; #endif } - -static inline void do_balance_completed (struct tree_balance * tb) +static inline void do_balance_completed(struct tree_balance *tb) { - + #ifdef CONFIG_REISERFS_CHECK - check_leaf_level (tb); - check_internal_levels (tb); - cur_tb = NULL; + check_leaf_level(tb); + check_internal_levels(tb); + cur_tb = NULL; #endif - /* reiserfs_free_block is no longer schedule safe. So, we need to - ** put the buffers we want freed on the thrown list during do_balance, - ** and then free them now - */ - - REISERFS_SB(tb->tb_sb)->s_do_balance ++; + /* reiserfs_free_block is no longer schedule safe. So, we need to + ** put the buffers we want freed on the thrown list during do_balance, + ** and then free them now + */ + REISERFS_SB(tb->tb_sb)->s_do_balance++; - /* release all nodes hold to perform the balancing */ - unfix_nodes(tb); + /* release all nodes hold to perform the balancing */ + unfix_nodes(tb); - free_thrown(tb) ; + free_thrown(tb); } +void do_balance(struct tree_balance *tb, /* tree_balance structure */ + struct item_head *ih, /* item header of inserted item */ + const char *body, /* body of inserted item or bytes to paste */ + int flag) +{ /* i - insert, d - delete + c - cut, p - paste + + Cut means delete part of an item + (includes removing an entry from a + directory). + + Delete means delete whole item. + + Insert means add a new item into the + tree. + + Paste means to append to the end of an + existing file or to insert a directory + entry. */ + int child_pos, /* position of a child node in its parent */ + h; /* level of the tree being processed */ + struct item_head insert_key[2]; /* in our processing of one level + we sometimes determine what + must be inserted into the next + higher level. This insertion + consists of a key or two keys + and their corresponding + pointers */ + struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next + level */ + + tb->tb_mode = flag; + tb->need_balance_dirty = 0; + + if (FILESYSTEM_CHANGED_TB(tb)) { + reiserfs_panic(tb->tb_sb, + "clm-6000: do_balance, fs generation has changed\n"); + } + /* if we have no real work to do */ + if (!tb->insert_size[0]) { + reiserfs_warning(tb->tb_sb, + "PAP-12350: do_balance: insert_size == 0, mode == %c", + flag); + unfix_nodes(tb); + return; + } + atomic_inc(&(fs_generation(tb->tb_sb))); + do_balance_starts(tb); - - -void do_balance (struct tree_balance * tb, /* tree_balance structure */ - struct item_head * ih, /* item header of inserted item */ - const char * body, /* body of inserted item or bytes to paste */ - int flag) /* i - insert, d - delete - c - cut, p - paste - - Cut means delete part of an item - (includes removing an entry from a - directory). - - Delete means delete whole item. - - Insert means add a new item into the - tree. - - Paste means to append to the end of an - existing file or to insert a directory - entry. */ -{ - int child_pos, /* position of a child node in its parent */ - h; /* level of the tree being processed */ - struct item_head insert_key[2]; /* in our processing of one level - we sometimes determine what - must be inserted into the next - higher level. This insertion - consists of a key or two keys - and their corresponding - pointers */ - struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next - level */ - - tb->tb_mode = flag; - tb->need_balance_dirty = 0; - - if (FILESYSTEM_CHANGED_TB(tb)) { - reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ; - } - /* if we have no real work to do */ - if ( ! tb->insert_size[0] ) { - reiserfs_warning (tb->tb_sb, - "PAP-12350: do_balance: insert_size == 0, mode == %c", - flag); - unfix_nodes(tb); - return; - } - - atomic_inc (&(fs_generation (tb->tb_sb))); - do_balance_starts (tb); - /* balance leaf returns 0 except if combining L R and S into one node. see balance_internal() for explanation of this - line of code.*/ - child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) + - balance_leaf (tb, ih, body, flag, insert_key, insert_ptr); + line of code. */ + child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + + balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); #ifdef CONFIG_REISERFS_CHECK - check_after_balance_leaf (tb); + check_after_balance_leaf(tb); #endif - /* Balance internal level of the tree. */ - for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ ) - child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr); - + /* Balance internal level of the tree. */ + for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) + child_pos = + balance_internal(tb, h, child_pos, insert_key, insert_ptr); - do_balance_completed (tb); + do_balance_completed(tb); } diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 12e91209544..c9f178fb494 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -2,7 +2,6 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ - #include #include #include @@ -31,82 +30,84 @@ ** We use reiserfs_truncate_file to pack the tail, since it already has ** all the conditions coded. */ -static int reiserfs_file_release (struct inode * inode, struct file * filp) +static int reiserfs_file_release(struct inode *inode, struct file *filp) { - struct reiserfs_transaction_handle th ; - int err; - int jbegin_failure = 0; + struct reiserfs_transaction_handle th; + int err; + int jbegin_failure = 0; - if (!S_ISREG (inode->i_mode)) - BUG (); + if (!S_ISREG(inode->i_mode)) + BUG(); - /* fast out for when nothing needs to be done */ - if ((atomic_read(&inode->i_count) > 1 || - !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || - !tail_has_to_be_packed(inode)) && - REISERFS_I(inode)->i_prealloc_count <= 0) { - return 0; - } - - reiserfs_write_lock(inode->i_sb); - down (&inode->i_sem); - /* freeing preallocation only involves relogging blocks that - * are already in the current transaction. preallocation gets - * freed at the end of each transaction, so it is impossible for - * us to log any additional blocks (including quota blocks) - */ - err = journal_begin(&th, inode->i_sb, 1); - if (err) { - /* uh oh, we can't allow the inode to go away while there - * is still preallocation blocks pending. Try to join the - * aborted transaction - */ - jbegin_failure = err; - err = journal_join_abort(&th, inode->i_sb, 1); + /* fast out for when nothing needs to be done */ + if ((atomic_read(&inode->i_count) > 1 || + !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || + !tail_has_to_be_packed(inode)) && + REISERFS_I(inode)->i_prealloc_count <= 0) { + return 0; + } + reiserfs_write_lock(inode->i_sb); + down(&inode->i_sem); + /* freeing preallocation only involves relogging blocks that + * are already in the current transaction. preallocation gets + * freed at the end of each transaction, so it is impossible for + * us to log any additional blocks (including quota blocks) + */ + err = journal_begin(&th, inode->i_sb, 1); if (err) { - /* hmpf, our choices here aren't good. We can pin the inode - * which will disallow unmount from every happening, we can - * do nothing, which will corrupt random memory on unmount, - * or we can forcibly remove the file from the preallocation - * list, which will leak blocks on disk. Lets pin the inode - * and let the admin know what is going on. - */ - igrab(inode); - reiserfs_warning(inode->i_sb, "pinning inode %lu because the " - "preallocation can't be freed"); - goto out; + /* uh oh, we can't allow the inode to go away while there + * is still preallocation blocks pending. Try to join the + * aborted transaction + */ + jbegin_failure = err; + err = journal_join_abort(&th, inode->i_sb, 1); + + if (err) { + /* hmpf, our choices here aren't good. We can pin the inode + * which will disallow unmount from every happening, we can + * do nothing, which will corrupt random memory on unmount, + * or we can forcibly remove the file from the preallocation + * list, which will leak blocks on disk. Lets pin the inode + * and let the admin know what is going on. + */ + igrab(inode); + reiserfs_warning(inode->i_sb, + "pinning inode %lu because the " + "preallocation can't be freed"); + goto out; + } } - } - reiserfs_update_inode_transaction(inode) ; + reiserfs_update_inode_transaction(inode); #ifdef REISERFS_PREALLOCATE - reiserfs_discard_prealloc (&th, inode); + reiserfs_discard_prealloc(&th, inode); #endif - err = journal_end(&th, inode->i_sb, 1); - - /* copy back the error code from journal_begin */ - if (!err) - err = jbegin_failure; - - if (!err && atomic_read(&inode->i_count) <= 1 && - (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && - tail_has_to_be_packed (inode)) { - /* if regular file is released by last holder and it has been - appended (we append by unformatted node only) or its direct - item(s) had to be converted, then it may have to be - indirect2direct converted */ - err = reiserfs_truncate_file(inode, 0) ; - } -out: - up (&inode->i_sem); - reiserfs_write_unlock(inode->i_sb); - return err; + err = journal_end(&th, inode->i_sb, 1); + + /* copy back the error code from journal_begin */ + if (!err) + err = jbegin_failure; + + if (!err && atomic_read(&inode->i_count) <= 1 && + (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && + tail_has_to_be_packed(inode)) { + /* if regular file is released by last holder and it has been + appended (we append by unformatted node only) or its direct + item(s) had to be converted, then it may have to be + indirect2direct converted */ + err = reiserfs_truncate_file(inode, 0); + } + out: + up(&inode->i_sem); + reiserfs_write_unlock(inode->i_sb); + return err; } -static void reiserfs_vfs_truncate_file(struct inode *inode) { - reiserfs_truncate_file(inode, 1) ; +static void reiserfs_vfs_truncate_file(struct inode *inode) +{ + reiserfs_truncate_file(inode, 1); } /* Sync a reiserfs file. */ @@ -116,26 +117,24 @@ static void reiserfs_vfs_truncate_file(struct inode *inode) { * be removed... */ -static int reiserfs_sync_file( - struct file * p_s_filp, - struct dentry * p_s_dentry, - int datasync - ) { - struct inode * p_s_inode = p_s_dentry->d_inode; - int n_err; - int barrier_done; - - if (!S_ISREG(p_s_inode->i_mode)) - BUG (); - n_err = sync_mapping_buffers(p_s_inode->i_mapping) ; - reiserfs_write_lock(p_s_inode->i_sb); - barrier_done = reiserfs_commit_for_inode(p_s_inode); - reiserfs_write_unlock(p_s_inode->i_sb); - if (barrier_done != 1) - blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); - if (barrier_done < 0) - return barrier_done; - return ( n_err < 0 ) ? -EIO : 0; +static int reiserfs_sync_file(struct file *p_s_filp, + struct dentry *p_s_dentry, int datasync) +{ + struct inode *p_s_inode = p_s_dentry->d_inode; + int n_err; + int barrier_done; + + if (!S_ISREG(p_s_inode->i_mode)) + BUG(); + n_err = sync_mapping_buffers(p_s_inode->i_mapping); + reiserfs_write_lock(p_s_inode->i_sb); + barrier_done = reiserfs_commit_for_inode(p_s_inode); + reiserfs_write_unlock(p_s_inode->i_sb); + if (barrier_done != 1) + blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); + if (barrier_done < 0) + return barrier_done; + return (n_err < 0) ? -EIO : 0; } /* I really do not want to play with memory shortage right now, so @@ -147,700 +146,797 @@ static int reiserfs_sync_file( /* Allocates blocks for a file to fulfil write request. Maps all unmapped but prepared pages from the list. Updates metadata with newly allocated blocknumbers as needed */ -static int reiserfs_allocate_blocks_for_region( - struct reiserfs_transaction_handle *th, - struct inode *inode, /* Inode we work with */ - loff_t pos, /* Writing position */ - int num_pages, /* number of pages write going - to touch */ - int write_bytes, /* amount of bytes to write */ - struct page **prepared_pages, /* array of - prepared pages - */ - int blocks_to_allocate /* Amount of blocks we - need to allocate to - fit the data into file - */ - ) +static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode we work with */ + loff_t pos, /* Writing position */ + int num_pages, /* number of pages write going + to touch */ + int write_bytes, /* amount of bytes to write */ + struct page **prepared_pages, /* array of + prepared pages + */ + int blocks_to_allocate /* Amount of blocks we + need to allocate to + fit the data into file + */ + ) { - struct cpu_key key; // cpu key of item that we are going to deal with - struct item_head *ih; // pointer to item head that we are going to deal with - struct buffer_head *bh; // Buffer head that contains items that we are going to deal with - __le32 * item; // pointer to item we are going to deal with - INITIALIZE_PATH(path); // path to item, that we are going to deal with. - b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. - reiserfs_blocknr_hint_t hint; // hint structure for block allocator. - size_t res; // return value of various functions that we call. - int curr_block; // current block used to keep track of unmapped blocks. - int i; // loop counter - int itempos; // position in item - unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in - // first page - unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */ - __u64 hole_size ; // amount of blocks for a file hole, if it needed to be created. - int modifying_this_item = 0; // Flag for items traversal code to keep track - // of the fact that we already prepared - // current block for journal - int will_prealloc = 0; - RFALSE(!blocks_to_allocate, "green-9004: tried to allocate zero blocks?"); - - /* only preallocate if this is a small write */ - if (REISERFS_I(inode)->i_prealloc_count || - (!(write_bytes & (inode->i_sb->s_blocksize -1)) && - blocks_to_allocate < - REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize)) - will_prealloc = REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize; - - allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) * - sizeof(b_blocknr_t), GFP_NOFS); - - /* First we compose a key to point at the writing position, we want to do - that outside of any locking region. */ - make_cpu_key (&key, inode, pos+1, TYPE_ANY, 3/*key length*/); - - /* If we came here, it means we absolutely need to open a transaction, - since we need to allocate some blocks */ - reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. - res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough - if (res) - goto error_exit; - reiserfs_update_inode_transaction(inode) ; - - /* Look for the in-tree position of our write, need path for block allocator */ - res = search_for_position_by_key(inode->i_sb, &key, &path); - if ( res == IO_ERROR ) { - res = -EIO; - goto error_exit; - } - - /* Allocate blocks */ - /* First fill in "hint" structure for block allocator */ - hint.th = th; // transaction handle. - hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine. - hint.inode = inode; // Inode is needed by block allocator too. - hint.search_start = 0; // We have no hint on where to search free blocks for block allocator. - hint.key = key.on_disk_key; // on disk key of file. - hint.block = inode->i_blocks>>(inode->i_sb->s_blocksize_bits-9); // Number of disk blocks this file occupies already. - hint.formatted_node = 0; // We are allocating blocks for unformatted node. - hint.preallocate = will_prealloc; - - /* Call block allocator to allocate blocks */ - res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate); - if ( res != CARRY_ON ) { - if ( res == NO_DISK_SPACE ) { - /* We flush the transaction in case of no space. This way some - blocks might become free */ - SB_JOURNAL(inode->i_sb)->j_must_wait = 1; - res = restart_transaction(th, inode, &path); - if (res) - goto error_exit; - - /* We might have scheduled, so search again */ - res = search_for_position_by_key(inode->i_sb, &key, &path); - if ( res == IO_ERROR ) { - res = -EIO; + struct cpu_key key; // cpu key of item that we are going to deal with + struct item_head *ih; // pointer to item head that we are going to deal with + struct buffer_head *bh; // Buffer head that contains items that we are going to deal with + __le32 *item; // pointer to item we are going to deal with + INITIALIZE_PATH(path); // path to item, that we are going to deal with. + b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. + reiserfs_blocknr_hint_t hint; // hint structure for block allocator. + size_t res; // return value of various functions that we call. + int curr_block; // current block used to keep track of unmapped blocks. + int i; // loop counter + int itempos; // position in item + unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in + // first page + unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */ + __u64 hole_size; // amount of blocks for a file hole, if it needed to be created. + int modifying_this_item = 0; // Flag for items traversal code to keep track + // of the fact that we already prepared + // current block for journal + int will_prealloc = 0; + RFALSE(!blocks_to_allocate, + "green-9004: tried to allocate zero blocks?"); + + /* only preallocate if this is a small write */ + if (REISERFS_I(inode)->i_prealloc_count || + (!(write_bytes & (inode->i_sb->s_blocksize - 1)) && + blocks_to_allocate < + REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize)) + will_prealloc = + REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize; + + allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) * + sizeof(b_blocknr_t), GFP_NOFS); + + /* First we compose a key to point at the writing position, we want to do + that outside of any locking region. */ + make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ ); + + /* If we came here, it means we absolutely need to open a transaction, + since we need to allocate some blocks */ + reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. + res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough + if (res) goto error_exit; - } + reiserfs_update_inode_transaction(inode); - /* update changed info for hint structure. */ - res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate); - if ( res != CARRY_ON ) { - res = -ENOSPC; - pathrelse(&path); + /* Look for the in-tree position of our write, need path for block allocator */ + res = search_for_position_by_key(inode->i_sb, &key, &path); + if (res == IO_ERROR) { + res = -EIO; goto error_exit; - } - } else { - res = -ENOSPC; - pathrelse(&path); - goto error_exit; } - } -#ifdef __BIG_ENDIAN - // Too bad, I have not found any way to convert a given region from - // cpu format to little endian format - { - int i; - for ( i = 0; i < blocks_to_allocate ; i++) - allocated_blocks[i]=cpu_to_le32(allocated_blocks[i]); - } -#endif - - /* Blocks allocating well might have scheduled and tree might have changed, - let's search the tree again */ - /* find where in the tree our write should go */ - res = search_for_position_by_key(inode->i_sb, &key, &path); - if ( res == IO_ERROR ) { - res = -EIO; - goto error_exit_free_blocks; - } - - bh = get_last_bh( &path ); // Get a bufferhead for last element in path. - ih = get_ih( &path ); // Get a pointer to last item head in path. - item = get_item( &path ); // Get a pointer to last item in path - - /* Let's see what we have found */ - if ( res != POSITION_FOUND ) { /* position not found, this means that we - might need to append file with holes - first */ - // Since we are writing past the file's end, we need to find out if - // there is a hole that needs to be inserted before our writing - // position, and how many blocks it is going to cover (we need to - // populate pointers to file blocks representing the hole with zeros) + /* Allocate blocks */ + /* First fill in "hint" structure for block allocator */ + hint.th = th; // transaction handle. + hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine. + hint.inode = inode; // Inode is needed by block allocator too. + hint.search_start = 0; // We have no hint on where to search free blocks for block allocator. + hint.key = key.on_disk_key; // on disk key of file. + hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); // Number of disk blocks this file occupies already. + hint.formatted_node = 0; // We are allocating blocks for unformatted node. + hint.preallocate = will_prealloc; + + /* Call block allocator to allocate blocks */ + res = + reiserfs_allocate_blocknrs(&hint, allocated_blocks, + blocks_to_allocate, blocks_to_allocate); + if (res != CARRY_ON) { + if (res == NO_DISK_SPACE) { + /* We flush the transaction in case of no space. This way some + blocks might become free */ + SB_JOURNAL(inode->i_sb)->j_must_wait = 1; + res = restart_transaction(th, inode, &path); + if (res) + goto error_exit; + + /* We might have scheduled, so search again */ + res = + search_for_position_by_key(inode->i_sb, &key, + &path); + if (res == IO_ERROR) { + res = -EIO; + goto error_exit; + } + /* update changed info for hint structure. */ + res = + reiserfs_allocate_blocknrs(&hint, allocated_blocks, + blocks_to_allocate, + blocks_to_allocate); + if (res != CARRY_ON) { + res = -ENOSPC; + pathrelse(&path); + goto error_exit; + } + } else { + res = -ENOSPC; + pathrelse(&path); + goto error_exit; + } + } +#ifdef __BIG_ENDIAN + // Too bad, I have not found any way to convert a given region from + // cpu format to little endian format { - int item_offset = 1; - /* - * if ih is stat data, its offset is 0 and we don't want to - * add 1 to pos in the hole_size calculation - */ - if (is_statdata_le_ih(ih)) - item_offset = 0; - hole_size = (pos + item_offset - - (le_key_k_offset( get_inode_item_key_version(inode), - &(ih->ih_key)) + - op_bytes_number(ih, inode->i_sb->s_blocksize))) >> - inode->i_sb->s_blocksize_bits; + int i; + for (i = 0; i < blocks_to_allocate; i++) + allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]); } +#endif - if ( hole_size > 0 ) { - int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); // How much data to insert first time. - /* area filled with zeroes, to supply as list of zero blocknumbers - We allocate it outside of loop just in case loop would spin for - several iterations. */ - char *zeros = kmalloc(to_paste*UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. - if ( !zeros ) { - res = -ENOMEM; + /* Blocks allocating well might have scheduled and tree might have changed, + let's search the tree again */ + /* find where in the tree our write should go */ + res = search_for_position_by_key(inode->i_sb, &key, &path); + if (res == IO_ERROR) { + res = -EIO; goto error_exit_free_blocks; - } - memset ( zeros, 0, to_paste*UNFM_P_SIZE); - do { - to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); - if ( is_indirect_le_ih(ih) ) { - /* Ok, there is existing indirect item already. Need to append it */ - /* Calculate position past inserted item */ - make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3); - res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)zeros, UNFM_P_SIZE*to_paste); - if ( res ) { - kfree(zeros); - goto error_exit_free_blocks; - } - } else if ( is_statdata_le_ih(ih) ) { - /* No existing item, create it */ - /* item head for new item */ - struct item_head ins_ih; - - /* create a key for our new item */ - make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); - - /* Create new item head for our new item */ - make_le_item_head (&ins_ih, &key, key.version, 1, - TYPE_INDIRECT, to_paste*UNFM_P_SIZE, - 0 /* free space */); - - /* Find where such item should live in the tree */ - res = search_item (inode->i_sb, &key, &path); - if ( res != ITEM_NOT_FOUND ) { - /* item should not exist, otherwise we have error */ - if ( res != -ENOSPC ) { - reiserfs_warning (inode->i_sb, - "green-9008: search_by_key (%K) returned %d", - &key, res); + } + + bh = get_last_bh(&path); // Get a bufferhead for last element in path. + ih = get_ih(&path); // Get a pointer to last item head in path. + item = get_item(&path); // Get a pointer to last item in path + + /* Let's see what we have found */ + if (res != POSITION_FOUND) { /* position not found, this means that we + might need to append file with holes + first */ + // Since we are writing past the file's end, we need to find out if + // there is a hole that needs to be inserted before our writing + // position, and how many blocks it is going to cover (we need to + // populate pointers to file blocks representing the hole with zeros) + + { + int item_offset = 1; + /* + * if ih is stat data, its offset is 0 and we don't want to + * add 1 to pos in the hole_size calculation + */ + if (is_statdata_le_ih(ih)) + item_offset = 0; + hole_size = (pos + item_offset - + (le_key_k_offset + (get_inode_item_key_version(inode), + &(ih->ih_key)) + op_bytes_number(ih, + inode-> + i_sb-> + s_blocksize))) + >> inode->i_sb->s_blocksize_bits; + } + + if (hole_size > 0) { + int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE); // How much data to insert first time. + /* area filled with zeroes, to supply as list of zero blocknumbers + We allocate it outside of loop just in case loop would spin for + several iterations. */ + char *zeros = kmalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. + if (!zeros) { + res = -ENOMEM; + goto error_exit_free_blocks; } - res = -EIO; - kfree(zeros); - goto error_exit_free_blocks; - } - res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)zeros); - } else { - reiserfs_panic(inode->i_sb, "green-9011: Unexpected key type %K\n", &key); + memset(zeros, 0, to_paste * UNFM_P_SIZE); + do { + to_paste = + min_t(__u64, hole_size, + MAX_ITEM_LEN(inode->i_sb-> + s_blocksize) / + UNFM_P_SIZE); + if (is_indirect_le_ih(ih)) { + /* Ok, there is existing indirect item already. Need to append it */ + /* Calculate position past inserted item */ + make_cpu_key(&key, inode, + le_key_k_offset + (get_inode_item_key_version + (inode), + &(ih->ih_key)) + + op_bytes_number(ih, + inode-> + i_sb-> + s_blocksize), + TYPE_INDIRECT, 3); + res = + reiserfs_paste_into_item(th, &path, + &key, + inode, + (char *) + zeros, + UNFM_P_SIZE + * + to_paste); + if (res) { + kfree(zeros); + goto error_exit_free_blocks; + } + } else if (is_statdata_le_ih(ih)) { + /* No existing item, create it */ + /* item head for new item */ + struct item_head ins_ih; + + /* create a key for our new item */ + make_cpu_key(&key, inode, 1, + TYPE_INDIRECT, 3); + + /* Create new item head for our new item */ + make_le_item_head(&ins_ih, &key, + key.version, 1, + TYPE_INDIRECT, + to_paste * + UNFM_P_SIZE, + 0 /* free space */ ); + + /* Find where such item should live in the tree */ + res = + search_item(inode->i_sb, &key, + &path); + if (res != ITEM_NOT_FOUND) { + /* item should not exist, otherwise we have error */ + if (res != -ENOSPC) { + reiserfs_warning(inode-> + i_sb, + "green-9008: search_by_key (%K) returned %d", + &key, + res); + } + res = -EIO; + kfree(zeros); + goto error_exit_free_blocks; + } + res = + reiserfs_insert_item(th, &path, + &key, &ins_ih, + inode, + (char *)zeros); + } else { + reiserfs_panic(inode->i_sb, + "green-9011: Unexpected key type %K\n", + &key); + } + if (res) { + kfree(zeros); + goto error_exit_free_blocks; + } + /* Now we want to check if transaction is too full, and if it is + we restart it. This will also free the path. */ + if (journal_transaction_should_end + (th, th->t_blocks_allocated)) { + res = + restart_transaction(th, inode, + &path); + if (res) { + pathrelse(&path); + kfree(zeros); + goto error_exit; + } + } + + /* Well, need to recalculate path and stuff */ + set_cpu_key_k_offset(&key, + cpu_key_k_offset(&key) + + (to_paste << inode-> + i_blkbits)); + res = + search_for_position_by_key(inode->i_sb, + &key, &path); + if (res == IO_ERROR) { + res = -EIO; + kfree(zeros); + goto error_exit_free_blocks; + } + bh = get_last_bh(&path); + ih = get_ih(&path); + item = get_item(&path); + hole_size -= to_paste; + } while (hole_size); + kfree(zeros); } - if ( res ) { - kfree(zeros); - goto error_exit_free_blocks; + } + // Go through existing indirect items first + // replace all zeroes with blocknumbers from list + // Note that if no corresponding item was found, by previous search, + // it means there are no existing in-tree representation for file area + // we are going to overwrite, so there is nothing to scan through for holes. + for (curr_block = 0, itempos = path.pos_in_item; + curr_block < blocks_to_allocate && res == POSITION_FOUND;) { + retry: + + if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) { + /* We run out of data in this indirect item, let's look for another + one. */ + /* First if we are already modifying current item, log it */ + if (modifying_this_item) { + journal_mark_dirty(th, inode->i_sb, bh); + modifying_this_item = 0; + } + /* Then set the key to look for a new indirect item (offset of old + item is added to old item length */ + set_cpu_key_k_offset(&key, + le_key_k_offset + (get_inode_item_key_version(inode), + &(ih->ih_key)) + + op_bytes_number(ih, + inode->i_sb-> + s_blocksize)); + /* Search ofor position of new key in the tree. */ + res = + search_for_position_by_key(inode->i_sb, &key, + &path); + if (res == IO_ERROR) { + res = -EIO; + goto error_exit_free_blocks; + } + bh = get_last_bh(&path); + ih = get_ih(&path); + item = get_item(&path); + itempos = path.pos_in_item; + continue; // loop to check all kinds of conditions and so on. } - /* Now we want to check if transaction is too full, and if it is - we restart it. This will also free the path. */ - if (journal_transaction_should_end(th, th->t_blocks_allocated)) { - res = restart_transaction(th, inode, &path); - if (res) { - pathrelse (&path); - kfree(zeros); - goto error_exit; - } - } - - /* Well, need to recalculate path and stuff */ - set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + (to_paste << inode->i_blkbits)); - res = search_for_position_by_key(inode->i_sb, &key, &path); - if ( res == IO_ERROR ) { - res = -EIO; - kfree(zeros); - goto error_exit_free_blocks; + /* Ok, we have correct position in item now, so let's see if it is + representing file hole (blocknumber is zero) and fill it if needed */ + if (!item[itempos]) { + /* Ok, a hole. Now we need to check if we already prepared this + block to be journaled */ + while (!modifying_this_item) { // loop until succeed + /* Well, this item is not journaled yet, so we must prepare + it for journal first, before we can change it */ + struct item_head tmp_ih; // We copy item head of found item, + // here to detect if fs changed under + // us while we were preparing for + // journal. + int fs_gen; // We store fs generation here to find if someone + // changes fs under our feet + + copy_item_head(&tmp_ih, ih); // Remember itemhead + fs_gen = get_generation(inode->i_sb); // remember fs generation + reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing. + if (fs_changed(fs_gen, inode->i_sb) + && item_moved(&tmp_ih, &path)) { + // Sigh, fs was changed under us, we need to look for new + // location of item we are working with + + /* unmark prepaerd area as journaled and search for it's + new position */ + reiserfs_restore_prepared_buffer(inode-> + i_sb, + bh); + res = + search_for_position_by_key(inode-> + i_sb, + &key, + &path); + if (res == IO_ERROR) { + res = -EIO; + goto error_exit_free_blocks; + } + bh = get_last_bh(&path); + ih = get_ih(&path); + item = get_item(&path); + itempos = path.pos_in_item; + goto retry; + } + modifying_this_item = 1; + } + item[itempos] = allocated_blocks[curr_block]; // Assign new block + curr_block++; } - bh=get_last_bh(&path); - ih=get_ih(&path); - item = get_item(&path); - hole_size -= to_paste; - } while ( hole_size ); - kfree(zeros); + itempos++; } - } - - // Go through existing indirect items first - // replace all zeroes with blocknumbers from list - // Note that if no corresponding item was found, by previous search, - // it means there are no existing in-tree representation for file area - // we are going to overwrite, so there is nothing to scan through for holes. - for ( curr_block = 0, itempos = path.pos_in_item ; curr_block < blocks_to_allocate && res == POSITION_FOUND ; ) { -retry: - - if ( itempos >= ih_item_len(ih)/UNFM_P_SIZE ) { - /* We run out of data in this indirect item, let's look for another - one. */ - /* First if we are already modifying current item, log it */ - if ( modifying_this_item ) { - journal_mark_dirty (th, inode->i_sb, bh); - modifying_this_item = 0; - } - /* Then set the key to look for a new indirect item (offset of old - item is added to old item length */ - set_cpu_key_k_offset( &key, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize)); - /* Search ofor position of new key in the tree. */ - res = search_for_position_by_key(inode->i_sb, &key, &path); - if ( res == IO_ERROR) { - res = -EIO; - goto error_exit_free_blocks; - } - bh=get_last_bh(&path); - ih=get_ih(&path); - item = get_item(&path); - itempos = path.pos_in_item; - continue; // loop to check all kinds of conditions and so on. + + if (modifying_this_item) { // We need to log last-accessed block, if it + // was modified, but not logged yet. + journal_mark_dirty(th, inode->i_sb, bh); } - /* Ok, we have correct position in item now, so let's see if it is - representing file hole (blocknumber is zero) and fill it if needed */ - if ( !item[itempos] ) { - /* Ok, a hole. Now we need to check if we already prepared this - block to be journaled */ - while ( !modifying_this_item ) { // loop until succeed - /* Well, this item is not journaled yet, so we must prepare - it for journal first, before we can change it */ - struct item_head tmp_ih; // We copy item head of found item, - // here to detect if fs changed under - // us while we were preparing for - // journal. - int fs_gen; // We store fs generation here to find if someone - // changes fs under our feet - - copy_item_head (&tmp_ih, ih); // Remember itemhead - fs_gen = get_generation (inode->i_sb); // remember fs generation - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing. - if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { - // Sigh, fs was changed under us, we need to look for new - // location of item we are working with - - /* unmark prepaerd area as journaled and search for it's - new position */ - reiserfs_restore_prepared_buffer(inode->i_sb, bh); - res = search_for_position_by_key(inode->i_sb, &key, &path); - if ( res == IO_ERROR) { - res = -EIO; - goto error_exit_free_blocks; - } - bh=get_last_bh(&path); - ih=get_ih(&path); - item = get_item(&path); - itempos = path.pos_in_item; - goto retry; + + if (curr_block < blocks_to_allocate) { + // Oh, well need to append to indirect item, or to create indirect item + // if there weren't any + if (is_indirect_le_ih(ih)) { + // Existing indirect item - append. First calculate key for append + // position. We do not need to recalculate path as it should + // already point to correct place. + make_cpu_key(&key, inode, + le_key_k_offset(get_inode_item_key_version + (inode), + &(ih->ih_key)) + + op_bytes_number(ih, + inode->i_sb->s_blocksize), + TYPE_INDIRECT, 3); + res = + reiserfs_paste_into_item(th, &path, &key, inode, + (char *)(allocated_blocks + + curr_block), + UNFM_P_SIZE * + (blocks_to_allocate - + curr_block)); + if (res) { + goto error_exit_free_blocks; + } + } else if (is_statdata_le_ih(ih)) { + // Last found item was statdata. That means we need to create indirect item. + struct item_head ins_ih; /* itemhead for new item */ + + /* create a key for our new item */ + make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3); // Position one, + // because that's + // where first + // indirect item + // begins + /* Create new item head for our new item */ + make_le_item_head(&ins_ih, &key, key.version, 1, + TYPE_INDIRECT, + (blocks_to_allocate - + curr_block) * UNFM_P_SIZE, + 0 /* free space */ ); + /* Find where such item should live in the tree */ + res = search_item(inode->i_sb, &key, &path); + if (res != ITEM_NOT_FOUND) { + /* Well, if we have found such item already, or some error + occured, we need to warn user and return error */ + if (res != -ENOSPC) { + reiserfs_warning(inode->i_sb, + "green-9009: search_by_key (%K) " + "returned %d", &key, + res); + } + res = -EIO; + goto error_exit_free_blocks; + } + /* Insert item into the tree with the data as its body */ + res = + reiserfs_insert_item(th, &path, &key, &ins_ih, + inode, + (char *)(allocated_blocks + + curr_block)); + } else { + reiserfs_panic(inode->i_sb, + "green-9010: unexpected item type for key %K\n", + &key); } - modifying_this_item = 1; - } - item[itempos] = allocated_blocks[curr_block]; // Assign new block - curr_block++; } - itempos++; - } - - if ( modifying_this_item ) { // We need to log last-accessed block, if it - // was modified, but not logged yet. - journal_mark_dirty (th, inode->i_sb, bh); - } - - if ( curr_block < blocks_to_allocate ) { - // Oh, well need to append to indirect item, or to create indirect item - // if there weren't any - if ( is_indirect_le_ih(ih) ) { - // Existing indirect item - append. First calculate key for append - // position. We do not need to recalculate path as it should - // already point to correct place. - make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3); - res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)(allocated_blocks+curr_block), UNFM_P_SIZE*(blocks_to_allocate-curr_block)); - if ( res ) { - goto error_exit_free_blocks; - } - } else if (is_statdata_le_ih(ih) ) { - // Last found item was statdata. That means we need to create indirect item. - struct item_head ins_ih; /* itemhead for new item */ - - /* create a key for our new item */ - make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); // Position one, - // because that's - // where first - // indirect item - // begins - /* Create new item head for our new item */ - make_le_item_head (&ins_ih, &key, key.version, 1, TYPE_INDIRECT, - (blocks_to_allocate-curr_block)*UNFM_P_SIZE, - 0 /* free space */); - /* Find where such item should live in the tree */ - res = search_item (inode->i_sb, &key, &path); - if ( res != ITEM_NOT_FOUND ) { - /* Well, if we have found such item already, or some error - occured, we need to warn user and return error */ - if ( res != -ENOSPC ) { - reiserfs_warning (inode->i_sb, - "green-9009: search_by_key (%K) " - "returned %d", &key, res); + // the caller is responsible for closing the transaction + // unless we return an error, they are also responsible for logging + // the inode. + // + pathrelse(&path); + /* + * cleanup prellocation from previous writes + * if this is a partial block write + */ + if (write_bytes & (inode->i_sb->s_blocksize - 1)) + reiserfs_discard_prealloc(th, inode); + reiserfs_write_unlock(inode->i_sb); + + // go through all the pages/buffers and map the buffers to newly allocated + // blocks (so that system knows where to write these pages later). + curr_block = 0; + for (i = 0; i < num_pages; i++) { + struct page *page = prepared_pages[i]; //current page + struct buffer_head *head = page_buffers(page); // first buffer for a page + int block_start, block_end; // in-page offsets for buffers. + + if (!page_buffers(page)) + reiserfs_panic(inode->i_sb, + "green-9005: No buffers for prepared page???"); + + /* For each buffer in page */ + for (bh = head, block_start = 0; bh != head || !block_start; + block_start = block_end, bh = bh->b_this_page) { + if (!bh) + reiserfs_panic(inode->i_sb, + "green-9006: Allocated but absent buffer for a page?"); + block_end = block_start + inode->i_sb->s_blocksize; + if (i == 0 && block_end <= from) + /* if this buffer is before requested data to map, skip it */ + continue; + if (i == num_pages - 1 && block_start >= to) + /* If this buffer is after requested data to map, abort + processing of current page */ + break; + + if (!buffer_mapped(bh)) { // Ok, unmapped buffer, need to map it + map_bh(bh, inode->i_sb, + le32_to_cpu(allocated_blocks + [curr_block])); + curr_block++; + set_buffer_new(bh); + } } - res = -EIO; - goto error_exit_free_blocks; - } - /* Insert item into the tree with the data as its body */ - res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)(allocated_blocks+curr_block)); - } else { - reiserfs_panic(inode->i_sb, "green-9010: unexpected item type for key %K\n",&key); - } - } - - // the caller is responsible for closing the transaction - // unless we return an error, they are also responsible for logging - // the inode. - // - pathrelse(&path); - /* - * cleanup prellocation from previous writes - * if this is a partial block write - */ - if (write_bytes & (inode->i_sb->s_blocksize -1)) - reiserfs_discard_prealloc(th, inode); - reiserfs_write_unlock(inode->i_sb); - - // go through all the pages/buffers and map the buffers to newly allocated - // blocks (so that system knows where to write these pages later). - curr_block = 0; - for ( i = 0; i < num_pages ; i++ ) { - struct page *page=prepared_pages[i]; //current page - struct buffer_head *head = page_buffers(page);// first buffer for a page - int block_start, block_end; // in-page offsets for buffers. - - if (!page_buffers(page)) - reiserfs_panic(inode->i_sb, "green-9005: No buffers for prepared page???"); - - /* For each buffer in page */ - for(bh = head, block_start = 0; bh != head || !block_start; - block_start=block_end, bh = bh->b_this_page) { - if (!bh) - reiserfs_panic(inode->i_sb, "green-9006: Allocated but absent buffer for a page?"); - block_end = block_start+inode->i_sb->s_blocksize; - if (i == 0 && block_end <= from ) - /* if this buffer is before requested data to map, skip it */ - continue; - if (i == num_pages - 1 && block_start >= to) - /* If this buffer is after requested data to map, abort - processing of current page */ - break; - - if ( !buffer_mapped(bh) ) { // Ok, unmapped buffer, need to map it - map_bh( bh, inode->i_sb, le32_to_cpu(allocated_blocks[curr_block])); - curr_block++; - set_buffer_new(bh); - } } - } - RFALSE( curr_block > blocks_to_allocate, "green-9007: Used too many blocks? weird"); + RFALSE(curr_block > blocks_to_allocate, + "green-9007: Used too many blocks? weird"); - kfree(allocated_blocks); - return 0; + kfree(allocated_blocks); + return 0; // Need to deal with transaction here. -error_exit_free_blocks: - pathrelse(&path); - // free blocks - for( i = 0; i < blocks_to_allocate; i++ ) - reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), 1); - -error_exit: - if (th->t_trans_id) { - int err; - // update any changes we made to blk count - reiserfs_update_sd(th, inode); - err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); - if (err) - res = err; - } - reiserfs_write_unlock(inode->i_sb); - kfree(allocated_blocks); - - return res; + error_exit_free_blocks: + pathrelse(&path); + // free blocks + for (i = 0; i < blocks_to_allocate; i++) + reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), + 1); + + error_exit: + if (th->t_trans_id) { + int err; + // update any changes we made to blk count + reiserfs_update_sd(th, inode); + err = + journal_end(th, inode->i_sb, + JOURNAL_PER_BALANCE_CNT * 3 + 1 + + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); + if (err) + res = err; + } + reiserfs_write_unlock(inode->i_sb); + kfree(allocated_blocks); + + return res; } /* Unlock pages prepared by reiserfs_prepare_file_region_for_write */ -static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */ - size_t num_pages /* amount of pages */) { - int i; // loop counter +static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */ + size_t num_pages /* amount of pages */ ) +{ + int i; // loop counter - for (i=0; i < num_pages ; i++) { - struct page *page = prepared_pages[i]; + for (i = 0; i < num_pages; i++) { + struct page *page = prepared_pages[i]; - try_to_free_buffers(page); - unlock_page(page); - page_cache_release(page); - } + try_to_free_buffers(page); + unlock_page(page); + page_cache_release(page); + } } /* This function will copy data from userspace to specified pages within supplied byte range */ -static int reiserfs_copy_from_user_to_file_region( - loff_t pos, /* In-file position */ - int num_pages, /* Number of pages affected */ - int write_bytes, /* Amount of bytes to write */ - struct page **prepared_pages, /* pointer to - array to - prepared pages - */ - const char __user *buf /* Pointer to user-supplied - data*/ - ) +static int reiserfs_copy_from_user_to_file_region(loff_t pos, /* In-file position */ + int num_pages, /* Number of pages affected */ + int write_bytes, /* Amount of bytes to write */ + struct page **prepared_pages, /* pointer to + array to + prepared pages + */ + const char __user * buf /* Pointer to user-supplied + data */ + ) { - long page_fault=0; // status of copy_from_user. - int i; // loop counter. - int offset; // offset in page - - for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) { - size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page - struct page *page=prepared_pages[i]; // Current page we process. - - fault_in_pages_readable( buf, count); - - /* Copy data from userspace to the current page */ - kmap(page); - page_fault = __copy_from_user(page_address(page)+offset, buf, count); // Copy the data. - /* Flush processor's dcache for this page */ - flush_dcache_page(page); - kunmap(page); - buf+=count; - write_bytes-=count; - - if (page_fault) - break; // Was there a fault? abort. - } - - return page_fault?-EFAULT:0; + long page_fault = 0; // status of copy_from_user. + int i; // loop counter. + int offset; // offset in page + + for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages; + i++, offset = 0) { + size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page + struct page *page = prepared_pages[i]; // Current page we process. + + fault_in_pages_readable(buf, count); + + /* Copy data from userspace to the current page */ + kmap(page); + page_fault = __copy_from_user(page_address(page) + offset, buf, count); // Copy the data. + /* Flush processor's dcache for this page */ + flush_dcache_page(page); + kunmap(page); + buf += count; + write_bytes -= count; + + if (page_fault) + break; // Was there a fault? abort. + } + + return page_fault ? -EFAULT : 0; } /* taken fs/buffer.c:__block_commit_write */ int reiserfs_commit_page(struct inode *inode, struct page *page, - unsigned from, unsigned to) + unsigned from, unsigned to) { - unsigned block_start, block_end; - int partial = 0; - unsigned blocksize; - struct buffer_head *bh, *head; - unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; - int new; - int logit = reiserfs_file_data_log(inode); - struct super_block *s = inode->i_sb; - int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; - struct reiserfs_transaction_handle th; - int ret = 0; - - th.t_trans_id = 0; - blocksize = 1 << inode->i_blkbits; - - if (logit) { - reiserfs_write_lock(s); - ret = journal_begin(&th, s, bh_per_page + 1); - if (ret) - goto drop_write_lock; - reiserfs_update_inode_transaction(inode); - } - for(bh = head = page_buffers(page), block_start = 0; - bh != head || !block_start; - block_start=block_end, bh = bh->b_this_page) - { - - new = buffer_new(bh); - clear_buffer_new(bh); - block_end = block_start + blocksize; - if (block_end <= from || block_start >= to) { - if (!buffer_uptodate(bh)) - partial = 1; - } else { - set_buffer_uptodate(bh); - if (logit) { - reiserfs_prepare_for_journal(s, bh, 1); - journal_mark_dirty(&th, s, bh); - } else if (!buffer_dirty(bh)) { - mark_buffer_dirty(bh); - /* do data=ordered on any page past the end - * of file and any buffer marked BH_New. - */ - if (reiserfs_data_ordered(inode->i_sb) && - (new || page->index >= i_size_index)) { - reiserfs_add_ordered_list(inode, bh); - } - } + unsigned block_start, block_end; + int partial = 0; + unsigned blocksize; + struct buffer_head *bh, *head; + unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; + int new; + int logit = reiserfs_file_data_log(inode); + struct super_block *s = inode->i_sb; + int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; + struct reiserfs_transaction_handle th; + int ret = 0; + + th.t_trans_id = 0; + blocksize = 1 << inode->i_blkbits; + + if (logit) { + reiserfs_write_lock(s); + ret = journal_begin(&th, s, bh_per_page + 1); + if (ret) + goto drop_write_lock; + reiserfs_update_inode_transaction(inode); + } + for (bh = head = page_buffers(page), block_start = 0; + bh != head || !block_start; + block_start = block_end, bh = bh->b_this_page) { + + new = buffer_new(bh); + clear_buffer_new(bh); + block_end = block_start + blocksize; + if (block_end <= from || block_start >= to) { + if (!buffer_uptodate(bh)) + partial = 1; + } else { + set_buffer_uptodate(bh); + if (logit) { + reiserfs_prepare_for_journal(s, bh, 1); + journal_mark_dirty(&th, s, bh); + } else if (!buffer_dirty(bh)) { + mark_buffer_dirty(bh); + /* do data=ordered on any page past the end + * of file and any buffer marked BH_New. + */ + if (reiserfs_data_ordered(inode->i_sb) && + (new || page->index >= i_size_index)) { + reiserfs_add_ordered_list(inode, bh); + } + } + } } - } - if (logit) { - ret = journal_end(&th, s, bh_per_page + 1); -drop_write_lock: - reiserfs_write_unlock(s); - } - /* - * If this is a partial write which happened to make all buffers - * uptodate then we can optimize away a bogus readpage() for - * the next read(). Here we 'discover' whether the page went - * uptodate as a result of this (potentially partial) write. - */ - if (!partial) - SetPageUptodate(page); - return ret; + if (logit) { + ret = journal_end(&th, s, bh_per_page + 1); + drop_write_lock: + reiserfs_write_unlock(s); + } + /* + * If this is a partial write which happened to make all buffers + * uptodate then we can optimize away a bogus readpage() for + * the next read(). Here we 'discover' whether the page went + * uptodate as a result of this (potentially partial) write. + */ + if (!partial) + SetPageUptodate(page); + return ret; } - /* Submit pages for write. This was separated from actual file copying because we might want to allocate block numbers in-between. This function assumes that caller will adjust file size to correct value. */ -static int reiserfs_submit_file_region_for_write( - struct reiserfs_transaction_handle *th, - struct inode *inode, - loff_t pos, /* Writing position offset */ - size_t num_pages, /* Number of pages to write */ - size_t write_bytes, /* number of bytes to write */ - struct page **prepared_pages /* list of pages */ - ) +static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos, /* Writing position offset */ + size_t num_pages, /* Number of pages to write */ + size_t write_bytes, /* number of bytes to write */ + struct page **prepared_pages /* list of pages */ + ) { - int status; // return status of block_commit_write. - int retval = 0; // Return value we are going to return. - int i; // loop counter - int offset; // Writing offset in page. - int orig_write_bytes = write_bytes; - int sd_update = 0; - - for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) { - int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page - struct page *page=prepared_pages[i]; // Current page we process. - - status = reiserfs_commit_page(inode, page, offset, offset+count); - if ( status ) - retval = status; // To not overcomplicate matters We are going to - // submit all the pages even if there was error. - // we only remember error status to report it on - // exit. - write_bytes-=count; - } - /* now that we've gotten all the ordered buffers marked dirty, - * we can safely update i_size and close any running transaction - */ - if ( pos + orig_write_bytes > inode->i_size) { - inode->i_size = pos + orig_write_bytes; // Set new size - /* If the file have grown so much that tail packing is no - * longer possible, reset "need to pack" flag */ - if ( (have_large_tails (inode->i_sb) && - inode->i_size > i_block_size (inode)*4) || - (have_small_tails (inode->i_sb) && - inode->i_size > i_block_size(inode)) ) - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ; - else if ( (have_large_tails (inode->i_sb) && - inode->i_size < i_block_size (inode)*4) || - (have_small_tails (inode->i_sb) && - inode->i_size < i_block_size(inode)) ) - REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ; - + int status; // return status of block_commit_write. + int retval = 0; // Return value we are going to return. + int i; // loop counter + int offset; // Writing offset in page. + int orig_write_bytes = write_bytes; + int sd_update = 0; + + for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages; + i++, offset = 0) { + int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page + struct page *page = prepared_pages[i]; // Current page we process. + + status = + reiserfs_commit_page(inode, page, offset, offset + count); + if (status) + retval = status; // To not overcomplicate matters We are going to + // submit all the pages even if there was error. + // we only remember error status to report it on + // exit. + write_bytes -= count; + } + /* now that we've gotten all the ordered buffers marked dirty, + * we can safely update i_size and close any running transaction + */ + if (pos + orig_write_bytes > inode->i_size) { + inode->i_size = pos + orig_write_bytes; // Set new size + /* If the file have grown so much that tail packing is no + * longer possible, reset "need to pack" flag */ + if ((have_large_tails(inode->i_sb) && + inode->i_size > i_block_size(inode) * 4) || + (have_small_tails(inode->i_sb) && + inode->i_size > i_block_size(inode))) + REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; + else if ((have_large_tails(inode->i_sb) && + inode->i_size < i_block_size(inode) * 4) || + (have_small_tails(inode->i_sb) && + inode->i_size < i_block_size(inode))) + REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; + + if (th->t_trans_id) { + reiserfs_write_lock(inode->i_sb); + reiserfs_update_sd(th, inode); // And update on-disk metadata + reiserfs_write_unlock(inode->i_sb); + } else + inode->i_sb->s_op->dirty_inode(inode); + + sd_update = 1; + } if (th->t_trans_id) { - reiserfs_write_lock(inode->i_sb); - reiserfs_update_sd(th, inode); // And update on-disk metadata - reiserfs_write_unlock(inode->i_sb); - } else - inode->i_sb->s_op->dirty_inode(inode); + reiserfs_write_lock(inode->i_sb); + if (!sd_update) + reiserfs_update_sd(th, inode); + status = journal_end(th, th->t_super, th->t_blocks_allocated); + if (status) + retval = status; + reiserfs_write_unlock(inode->i_sb); + } + th->t_trans_id = 0; - sd_update = 1; - } - if (th->t_trans_id) { - reiserfs_write_lock(inode->i_sb); - if (!sd_update) - reiserfs_update_sd(th, inode); - status = journal_end(th, th->t_super, th->t_blocks_allocated); - if (status) - retval = status; - reiserfs_write_unlock(inode->i_sb); - } - th->t_trans_id = 0; - - /* - * we have to unlock the pages after updating i_size, otherwise - * we race with writepage - */ - for ( i = 0; i < num_pages ; i++) { - struct page *page=prepared_pages[i]; - unlock_page(page); - mark_page_accessed(page); - page_cache_release(page); - } - return retval; + /* + * we have to unlock the pages after updating i_size, otherwise + * we race with writepage + */ + for (i = 0; i < num_pages; i++) { + struct page *page = prepared_pages[i]; + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + } + return retval; } /* Look if passed writing region is going to touch file's tail (if it is present). And if it is, convert the tail to unformatted node */ -static int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to deal with */ - loff_t pos, /* Writing position */ - int write_bytes /* amount of bytes to write */ - ) +static int reiserfs_check_for_tail_and_convert(struct inode *inode, /* inode to deal with */ + loff_t pos, /* Writing position */ + int write_bytes /* amount of bytes to write */ + ) { - INITIALIZE_PATH(path); // needed for search_for_position - struct cpu_key key; // Key that would represent last touched writing byte. - struct item_head *ih; // item header of found block; - int res; // Return value of various functions we call. - int cont_expand_offset; // We will put offset for generic_cont_expand here - // This can be int just because tails are created - // only for small files. - + INITIALIZE_PATH(path); // needed for search_for_position + struct cpu_key key; // Key that would represent last touched writing byte. + struct item_head *ih; // item header of found block; + int res; // Return value of various functions we call. + int cont_expand_offset; // We will put offset for generic_cont_expand here + // This can be int just because tails are created + // only for small files. + /* this embodies a dependency on a particular tail policy */ - if ( inode->i_size >= inode->i_sb->s_blocksize*4 ) { - /* such a big files do not have tails, so we won't bother ourselves - to look for tails, simply return */ - return 0; - } - - reiserfs_write_lock(inode->i_sb); - /* find the item containing the last byte to be written, or if - * writing past the end of the file then the last item of the - * file (and then we check its type). */ - make_cpu_key (&key, inode, pos+write_bytes+1, TYPE_ANY, 3/*key length*/); - res = search_for_position_by_key(inode->i_sb, &key, &path); - if ( res == IO_ERROR ) { - reiserfs_write_unlock(inode->i_sb); - return -EIO; - } - ih = get_ih(&path); - res = 0; - if ( is_direct_le_ih(ih) ) { - /* Ok, closest item is file tail (tails are stored in "direct" - * items), so we need to unpack it. */ - /* To not overcomplicate matters, we just call generic_cont_expand - which will in turn call other stuff and finally will boil down to - reiserfs_get_block() that would do necessary conversion. */ - cont_expand_offset = le_key_k_offset(get_inode_item_key_version(inode), &(ih->ih_key)); - pathrelse(&path); - res = generic_cont_expand( inode, cont_expand_offset); - } else - pathrelse(&path); + if (inode->i_size >= inode->i_sb->s_blocksize * 4) { + /* such a big files do not have tails, so we won't bother ourselves + to look for tails, simply return */ + return 0; + } - reiserfs_write_unlock(inode->i_sb); - return res; + reiserfs_write_lock(inode->i_sb); + /* find the item containing the last byte to be written, or if + * writing past the end of the file then the last item of the + * file (and then we check its type). */ + make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY, + 3 /*key length */ ); + res = search_for_position_by_key(inode->i_sb, &key, &path); + if (res == IO_ERROR) { + reiserfs_write_unlock(inode->i_sb); + return -EIO; + } + ih = get_ih(&path); + res = 0; + if (is_direct_le_ih(ih)) { + /* Ok, closest item is file tail (tails are stored in "direct" + * items), so we need to unpack it. */ + /* To not overcomplicate matters, we just call generic_cont_expand + which will in turn call other stuff and finally will boil down to + reiserfs_get_block() that would do necessary conversion. */ + cont_expand_offset = + le_key_k_offset(get_inode_item_key_version(inode), + &(ih->ih_key)); + pathrelse(&path); + res = generic_cont_expand(inode, cont_expand_offset); + } else + pathrelse(&path); + + reiserfs_write_unlock(inode->i_sb); + return res; } /* This function locks pages starting from @pos for @inode. @@ -851,275 +947,296 @@ static int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to append), it is zeroed, then. Returns number of unallocated blocks that should be allocated to cover new file data.*/ -static int reiserfs_prepare_file_region_for_write( - struct inode *inode /* Inode of the file */, - loff_t pos, /* position in the file */ - size_t num_pages, /* number of pages to - prepare */ - size_t write_bytes, /* Amount of bytes to be - overwritten from - @pos */ - struct page **prepared_pages /* pointer to array - where to store - prepared pages */ - ) +static int reiserfs_prepare_file_region_for_write(struct inode *inode + /* Inode of the file */ , + loff_t pos, /* position in the file */ + size_t num_pages, /* number of pages to + prepare */ + size_t write_bytes, /* Amount of bytes to be + overwritten from + @pos */ + struct page **prepared_pages /* pointer to array + where to store + prepared pages */ + ) { - int res=0; // Return values of different functions we call. - unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages. - int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page - int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; - /* offset of last modified byte in last - page */ - struct address_space *mapping = inode->i_mapping; // Pages are mapped here. - int i; // Simple counter - int blocks = 0; /* Return value (blocks that should be allocated) */ - struct buffer_head *bh, *head; // Current bufferhead and first bufferhead - // of a page. - unsigned block_start, block_end; // Starting and ending offsets of current - // buffer in the page. - struct buffer_head *wait[2], **wait_bh=wait; // Buffers for page, if - // Page appeared to be not up - // to date. Note how we have - // at most 2 buffers, this is - // because we at most may - // partially overwrite two - // buffers for one page. One at // the beginning of write area - // and one at the end. - // Everything inthe middle gets // overwritten totally. - - struct cpu_key key; // cpu key of item that we are going to deal with - struct item_head *ih = NULL; // pointer to item head that we are going to deal with - struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with - INITIALIZE_PATH(path); // path to item, that we are going to deal with. - __le32 * item=NULL; // pointer to item we are going to deal with - int item_pos=-1; /* Position in indirect item */ - - - if ( num_pages < 1 ) { - reiserfs_warning (inode->i_sb, - "green-9001: reiserfs_prepare_file_region_for_write " - "called with zero number of pages to process"); - return -EFAULT; - } - - /* We have 2 loops for pages. In first loop we grab and lock the pages, so - that nobody would touch these until we release the pages. Then - we'd start to deal with mapping buffers to blocks. */ - for ( i = 0; i < num_pages; i++) { - prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page - if ( !prepared_pages[i]) { - res = -ENOMEM; - goto failed_page_grabbing; - } - if (!page_has_buffers(prepared_pages[i])) - create_empty_buffers(prepared_pages[i], inode->i_sb->s_blocksize, 0); - } - - /* Let's count amount of blocks for a case where all the blocks - overwritten are new (we will substract already allocated blocks later)*/ - if ( num_pages > 2 ) - /* These are full-overwritten pages so we count all the blocks in - these pages are counted as needed to be allocated */ - blocks = (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits); - - /* count blocks needed for first page (possibly partially written) */ - blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + - !!(from & (inode->i_sb->s_blocksize-1)); /* roundup */ - - /* Now we account for last page. If last page == first page (we - overwrite only one page), we substract all the blocks past the - last writing position in a page out of already calculated number - of blocks */ - blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT-inode->i_blkbits)) - - ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits); - /* Note how we do not roundup here since partial blocks still - should be allocated */ - - /* Now if all the write area lies past the file end, no point in - maping blocks, since there is none, so we just zero out remaining - parts of first and last pages in write area (if needed) */ - if ( (pos & ~((loff_t)PAGE_CACHE_SIZE - 1)) > inode->i_size ) { - if ( from != 0 ) {/* First page needs to be partially zeroed */ - char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); - memset(kaddr, 0, from); - kunmap_atomic( kaddr, KM_USER0); - } - if ( to != PAGE_CACHE_SIZE ) { /* Last page needs to be partially zeroed */ - char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0); - memset(kaddr+to, 0, PAGE_CACHE_SIZE - to); - kunmap_atomic( kaddr, KM_USER0); + int res = 0; // Return values of different functions we call. + unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages. + int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page + int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; + /* offset of last modified byte in last + page */ + struct address_space *mapping = inode->i_mapping; // Pages are mapped here. + int i; // Simple counter + int blocks = 0; /* Return value (blocks that should be allocated) */ + struct buffer_head *bh, *head; // Current bufferhead and first bufferhead + // of a page. + unsigned block_start, block_end; // Starting and ending offsets of current + // buffer in the page. + struct buffer_head *wait[2], **wait_bh = wait; // Buffers for page, if + // Page appeared to be not up + // to date. Note how we have + // at most 2 buffers, this is + // because we at most may + // partially overwrite two + // buffers for one page. One at // the beginning of write area + // and one at the end. + // Everything inthe middle gets // overwritten totally. + + struct cpu_key key; // cpu key of item that we are going to deal with + struct item_head *ih = NULL; // pointer to item head that we are going to deal with + struct buffer_head *itembuf = NULL; // Buffer head that contains items that we are going to deal with + INITIALIZE_PATH(path); // path to item, that we are going to deal with. + __le32 *item = NULL; // pointer to item we are going to deal with + int item_pos = -1; /* Position in indirect item */ + + if (num_pages < 1) { + reiserfs_warning(inode->i_sb, + "green-9001: reiserfs_prepare_file_region_for_write " + "called with zero number of pages to process"); + return -EFAULT; } - /* Since all blocks are new - use already calculated value */ - return blocks; - } - - /* Well, since we write somewhere into the middle of a file, there is - possibility we are writing over some already allocated blocks, so - let's map these blocks and substract number of such blocks out of blocks - we need to allocate (calculated above) */ - /* Mask write position to start on blocksize, we do it out of the - loop for performance reasons */ - pos &= ~((loff_t) inode->i_sb->s_blocksize - 1); - /* Set cpu key to the starting position in a file (on left block boundary)*/ - make_cpu_key (&key, inode, 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), TYPE_ANY, 3/*key length*/); - - reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key() - for ( i = 0; i < num_pages ; i++ ) { - - head = page_buffers(prepared_pages[i]); - /* For each buffer in the page */ - for(bh = head, block_start = 0; bh != head || !block_start; - block_start=block_end, bh = bh->b_this_page) { - if (!bh) - reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); - /* Find where this buffer ends */ - block_end = block_start+inode->i_sb->s_blocksize; - if (i == 0 && block_end <= from ) - /* if this buffer is before requested data to map, skip it*/ - continue; - - if (i == num_pages - 1 && block_start >= to) { - /* If this buffer is after requested data to map, abort - processing of current page */ - break; + /* We have 2 loops for pages. In first loop we grab and lock the pages, so + that nobody would touch these until we release the pages. Then + we'd start to deal with mapping buffers to blocks. */ + for (i = 0; i < num_pages; i++) { + prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page + if (!prepared_pages[i]) { + res = -ENOMEM; + goto failed_page_grabbing; } + if (!page_has_buffers(prepared_pages[i])) + create_empty_buffers(prepared_pages[i], + inode->i_sb->s_blocksize, 0); + } - if ( buffer_mapped(bh) && bh->b_blocknr !=0 ) { - /* This is optimisation for a case where buffer is mapped - and have blocknumber assigned. In case significant amount - of such buffers are present, we may avoid some amount - of search_by_key calls. - Probably it would be possible to move parts of this code - out of BKL, but I afraid that would overcomplicate code - without any noticeable benefit. - */ - item_pos++; - /* Update the key */ - set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize); - blocks--; // Decrease the amount of blocks that need to be - // allocated - continue; // Go to the next buffer + /* Let's count amount of blocks for a case where all the blocks + overwritten are new (we will substract already allocated blocks later) */ + if (num_pages > 2) + /* These are full-overwritten pages so we count all the blocks in + these pages are counted as needed to be allocated */ + blocks = + (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + /* count blocks needed for first page (possibly partially written) */ + blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1)); /* roundup */ + + /* Now we account for last page. If last page == first page (we + overwrite only one page), we substract all the blocks past the + last writing position in a page out of already calculated number + of blocks */ + blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - + ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits); + /* Note how we do not roundup here since partial blocks still + should be allocated */ + + /* Now if all the write area lies past the file end, no point in + maping blocks, since there is none, so we just zero out remaining + parts of first and last pages in write area (if needed) */ + if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) { + if (from != 0) { /* First page needs to be partially zeroed */ + char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); + memset(kaddr, 0, from); + kunmap_atomic(kaddr, KM_USER0); + } + if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */ + char *kaddr = + kmap_atomic(prepared_pages[num_pages - 1], + KM_USER0); + memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); + kunmap_atomic(kaddr, KM_USER0); } - if ( !itembuf || /* if first iteration */ - item_pos >= ih_item_len(ih)/UNFM_P_SIZE) - { /* or if we progressed past the - current unformatted_item */ - /* Try to find next item */ - res = search_for_position_by_key(inode->i_sb, &key, &path); - /* Abort if no more items */ - if ( res != POSITION_FOUND ) { - /* make sure later loops don't use this item */ - itembuf = NULL; - item = NULL; - break; + /* Since all blocks are new - use already calculated value */ + return blocks; + } + + /* Well, since we write somewhere into the middle of a file, there is + possibility we are writing over some already allocated blocks, so + let's map these blocks and substract number of such blocks out of blocks + we need to allocate (calculated above) */ + /* Mask write position to start on blocksize, we do it out of the + loop for performance reasons */ + pos &= ~((loff_t) inode->i_sb->s_blocksize - 1); + /* Set cpu key to the starting position in a file (on left block boundary) */ + make_cpu_key(&key, inode, + 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), + TYPE_ANY, 3 /*key length */ ); + + reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key() + for (i = 0; i < num_pages; i++) { + + head = page_buffers(prepared_pages[i]); + /* For each buffer in the page */ + for (bh = head, block_start = 0; bh != head || !block_start; + block_start = block_end, bh = bh->b_this_page) { + if (!bh) + reiserfs_panic(inode->i_sb, + "green-9002: Allocated but absent buffer for a page?"); + /* Find where this buffer ends */ + block_end = block_start + inode->i_sb->s_blocksize; + if (i == 0 && block_end <= from) + /* if this buffer is before requested data to map, skip it */ + continue; + + if (i == num_pages - 1 && block_start >= to) { + /* If this buffer is after requested data to map, abort + processing of current page */ + break; } - /* Update information about current indirect item */ - itembuf = get_last_bh( &path ); - ih = get_ih( &path ); - item = get_item( &path ); - item_pos = path.pos_in_item; + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + /* This is optimisation for a case where buffer is mapped + and have blocknumber assigned. In case significant amount + of such buffers are present, we may avoid some amount + of search_by_key calls. + Probably it would be possible to move parts of this code + out of BKL, but I afraid that would overcomplicate code + without any noticeable benefit. + */ + item_pos++; + /* Update the key */ + set_cpu_key_k_offset(&key, + cpu_key_k_offset(&key) + + inode->i_sb->s_blocksize); + blocks--; // Decrease the amount of blocks that need to be + // allocated + continue; // Go to the next buffer + } - RFALSE( !is_indirect_le_ih (ih), "green-9003: indirect item expected"); - } + if (!itembuf || /* if first iteration */ + item_pos >= ih_item_len(ih) / UNFM_P_SIZE) { /* or if we progressed past the + current unformatted_item */ + /* Try to find next item */ + res = + search_for_position_by_key(inode->i_sb, + &key, &path); + /* Abort if no more items */ + if (res != POSITION_FOUND) { + /* make sure later loops don't use this item */ + itembuf = NULL; + item = NULL; + break; + } + + /* Update information about current indirect item */ + itembuf = get_last_bh(&path); + ih = get_ih(&path); + item = get_item(&path); + item_pos = path.pos_in_item; + + RFALSE(!is_indirect_le_ih(ih), + "green-9003: indirect item expected"); + } - /* See if there is some block associated with the file - at that position, map the buffer to this block */ - if ( get_block_num(item,item_pos) ) { - map_bh(bh, inode->i_sb, get_block_num(item,item_pos)); - blocks--; // Decrease the amount of blocks that need to be - // allocated + /* See if there is some block associated with the file + at that position, map the buffer to this block */ + if (get_block_num(item, item_pos)) { + map_bh(bh, inode->i_sb, + get_block_num(item, item_pos)); + blocks--; // Decrease the amount of blocks that need to be + // allocated + } + item_pos++; + /* Update the key */ + set_cpu_key_k_offset(&key, + cpu_key_k_offset(&key) + + inode->i_sb->s_blocksize); } - item_pos++; - /* Update the key */ - set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize); } - } - pathrelse(&path); // Free the path - reiserfs_write_unlock(inode->i_sb); + pathrelse(&path); // Free the path + reiserfs_write_unlock(inode->i_sb); /* Now zero out unmappend buffers for the first and last pages of write area or issue read requests if page is mapped. */ /* First page, see if it is not uptodate */ - if ( !PageUptodate(prepared_pages[0]) ) { - head = page_buffers(prepared_pages[0]); - - /* For each buffer in page */ - for(bh = head, block_start = 0; bh != head || !block_start; - block_start=block_end, bh = bh->b_this_page) { - - if (!bh) - reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); - /* Find where this buffer ends */ - block_end = block_start+inode->i_sb->s_blocksize; - if ( block_end <= from ) - /* if this buffer is before requested data to map, skip it*/ - continue; - if ( block_start < from ) { /* Aha, our partial buffer */ - if ( buffer_mapped(bh) ) { /* If it is mapped, we need to - issue READ request for it to - not loose data */ - ll_rw_block(READ, 1, &bh); - *wait_bh++=bh; - } else { /* Not mapped, zero it */ - char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); - memset(kaddr+block_start, 0, from-block_start); - kunmap_atomic( kaddr, KM_USER0); - set_buffer_uptodate(bh); - } + if (!PageUptodate(prepared_pages[0])) { + head = page_buffers(prepared_pages[0]); + + /* For each buffer in page */ + for (bh = head, block_start = 0; bh != head || !block_start; + block_start = block_end, bh = bh->b_this_page) { + + if (!bh) + reiserfs_panic(inode->i_sb, + "green-9002: Allocated but absent buffer for a page?"); + /* Find where this buffer ends */ + block_end = block_start + inode->i_sb->s_blocksize; + if (block_end <= from) + /* if this buffer is before requested data to map, skip it */ + continue; + if (block_start < from) { /* Aha, our partial buffer */ + if (buffer_mapped(bh)) { /* If it is mapped, we need to + issue READ request for it to + not loose data */ + ll_rw_block(READ, 1, &bh); + *wait_bh++ = bh; + } else { /* Not mapped, zero it */ + char *kaddr = + kmap_atomic(prepared_pages[0], + KM_USER0); + memset(kaddr + block_start, 0, + from - block_start); + kunmap_atomic(kaddr, KM_USER0); + set_buffer_uptodate(bh); + } + } } - } } /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */ - if ( !PageUptodate(prepared_pages[num_pages-1]) || - ((pos+write_bytes)>>PAGE_CACHE_SHIFT) > (inode->i_size>>PAGE_CACHE_SHIFT) ) { - head = page_buffers(prepared_pages[num_pages-1]); - - /* for each buffer in page */ - for(bh = head, block_start = 0; bh != head || !block_start; - block_start=block_end, bh = bh->b_this_page) { - - if (!bh) - reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); - /* Find where this buffer ends */ - block_end = block_start+inode->i_sb->s_blocksize; - if ( block_start >= to ) - /* if this buffer is after requested data to map, skip it*/ - break; - if ( block_end > to ) { /* Aha, our partial buffer */ - if ( buffer_mapped(bh) ) { /* If it is mapped, we need to - issue READ request for it to - not loose data */ - ll_rw_block(READ, 1, &bh); - *wait_bh++=bh; - } else { /* Not mapped, zero it */ - char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0); - memset(kaddr+to, 0, block_end-to); - kunmap_atomic( kaddr, KM_USER0); - set_buffer_uptodate(bh); - } + if (!PageUptodate(prepared_pages[num_pages - 1]) || + ((pos + write_bytes) >> PAGE_CACHE_SHIFT) > + (inode->i_size >> PAGE_CACHE_SHIFT)) { + head = page_buffers(prepared_pages[num_pages - 1]); + + /* for each buffer in page */ + for (bh = head, block_start = 0; bh != head || !block_start; + block_start = block_end, bh = bh->b_this_page) { + + if (!bh) + reiserfs_panic(inode->i_sb, + "green-9002: Allocated but absent buffer for a page?"); + /* Find where this buffer ends */ + block_end = block_start + inode->i_sb->s_blocksize; + if (block_start >= to) + /* if this buffer is after requested data to map, skip it */ + break; + if (block_end > to) { /* Aha, our partial buffer */ + if (buffer_mapped(bh)) { /* If it is mapped, we need to + issue READ request for it to + not loose data */ + ll_rw_block(READ, 1, &bh); + *wait_bh++ = bh; + } else { /* Not mapped, zero it */ + char *kaddr = + kmap_atomic(prepared_pages + [num_pages - 1], + KM_USER0); + memset(kaddr + to, 0, block_end - to); + kunmap_atomic(kaddr, KM_USER0); + set_buffer_uptodate(bh); + } + } } - } } - /* Wait for read requests we made to happen, if necessary */ - while(wait_bh > wait) { - wait_on_buffer(*--wait_bh); - if (!buffer_uptodate(*wait_bh)) { - res = -EIO; - goto failed_read; + /* Wait for read requests we made to happen, if necessary */ + while (wait_bh > wait) { + wait_on_buffer(*--wait_bh); + if (!buffer_uptodate(*wait_bh)) { + res = -EIO; + goto failed_read; + } } - } - - return blocks; -failed_page_grabbing: - num_pages = i; -failed_read: - reiserfs_unprepare_pages(prepared_pages, num_pages); - return res; + + return blocks; + failed_page_grabbing: + num_pages = i; + failed_read: + reiserfs_unprepare_pages(prepared_pages, num_pages); + return res; } /* Write @count bytes at position @ppos in a file indicated by @file @@ -1148,262 +1265,305 @@ failed_read: Future Features: providing search_by_key with hints. */ -static ssize_t reiserfs_file_write( struct file *file, /* the file we are going to write into */ - const char __user *buf, /* pointer to user supplied data -(in userspace) */ - size_t count, /* amount of bytes to write */ - loff_t *ppos /* pointer to position in file that we start writing at. Should be updated to - * new current position before returning. */ ) +static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */ + const char __user * buf, /* pointer to user supplied data + (in userspace) */ + size_t count, /* amount of bytes to write */ + loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to + * new current position before returning. */ + ) { - size_t already_written = 0; // Number of bytes already written to the file. - loff_t pos; // Current position in the file. - ssize_t res; // return value of various functions that we call. - int err = 0; - struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to. - /* To simplify coding at this time, we store - locked pages in array for now */ - struct page * prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME]; - struct reiserfs_transaction_handle th; - th.t_trans_id = 0; - - if ( file->f_flags & O_DIRECT) { // Direct IO needs treatment - ssize_t result, after_file_end = 0; - if ( (*ppos + count >= inode->i_size) || (file->f_flags & O_APPEND) ) { - /* If we are appending a file, we need to put this savelink in here. - If we will crash while doing direct io, finish_unfinished will - cut the garbage from the file end. */ - reiserfs_write_lock(inode->i_sb); - err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT ); - if (err) { - reiserfs_write_unlock (inode->i_sb); - return err; - } - reiserfs_update_inode_transaction(inode); - add_save_link (&th, inode, 1 /* Truncate */); - after_file_end = 1; - err = journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT ); - reiserfs_write_unlock(inode->i_sb); - if (err) - return err; - } - result = generic_file_write(file, buf, count, ppos); - - if ( after_file_end ) { /* Now update i_size and remove the savelink */ - struct reiserfs_transaction_handle th; - reiserfs_write_lock(inode->i_sb); - err = journal_begin(&th, inode->i_sb, 1); - if (err) { - reiserfs_write_unlock (inode->i_sb); - return err; - } - reiserfs_update_inode_transaction(inode); - reiserfs_update_sd(&th, inode); - err = journal_end(&th, inode->i_sb, 1); - if (err) { - reiserfs_write_unlock (inode->i_sb); - return err; - } - err = remove_save_link (inode, 1/* truncate */); - reiserfs_write_unlock(inode->i_sb); - if (err) - return err; - } - - return result; - } - - if ( unlikely((ssize_t) count < 0 )) - return -EINVAL; - - if (unlikely(!access_ok(VERIFY_READ, buf, count))) - return -EFAULT; - - down(&inode->i_sem); // locks the entire file for just us - - pos = *ppos; - - /* Check if we can write to specified region of file, file - is not overly big and this kind of stuff. Adjust pos and - count, if needed */ - res = generic_write_checks(file, &pos, &count, 0); - if (res) - goto out; - - if ( count == 0 ) - goto out; - - res = remove_suid(file->f_dentry); - if (res) - goto out; - - inode_update_time(inode, 1); /* Both mtime and ctime */ - - // Ok, we are done with all the checks. + size_t already_written = 0; // Number of bytes already written to the file. + loff_t pos; // Current position in the file. + ssize_t res; // return value of various functions that we call. + int err = 0; + struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to. + /* To simplify coding at this time, we store + locked pages in array for now */ + struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME]; + struct reiserfs_transaction_handle th; + th.t_trans_id = 0; + + if (file->f_flags & O_DIRECT) { // Direct IO needs treatment + ssize_t result, after_file_end = 0; + if ((*ppos + count >= inode->i_size) + || (file->f_flags & O_APPEND)) { + /* If we are appending a file, we need to put this savelink in here. + If we will crash while doing direct io, finish_unfinished will + cut the garbage from the file end. */ + reiserfs_write_lock(inode->i_sb); + err = + journal_begin(&th, inode->i_sb, + JOURNAL_PER_BALANCE_CNT); + if (err) { + reiserfs_write_unlock(inode->i_sb); + return err; + } + reiserfs_update_inode_transaction(inode); + add_save_link(&th, inode, 1 /* Truncate */ ); + after_file_end = 1; + err = + journal_end(&th, inode->i_sb, + JOURNAL_PER_BALANCE_CNT); + reiserfs_write_unlock(inode->i_sb); + if (err) + return err; + } + result = generic_file_write(file, buf, count, ppos); + + if (after_file_end) { /* Now update i_size and remove the savelink */ + struct reiserfs_transaction_handle th; + reiserfs_write_lock(inode->i_sb); + err = journal_begin(&th, inode->i_sb, 1); + if (err) { + reiserfs_write_unlock(inode->i_sb); + return err; + } + reiserfs_update_inode_transaction(inode); + reiserfs_update_sd(&th, inode); + err = journal_end(&th, inode->i_sb, 1); + if (err) { + reiserfs_write_unlock(inode->i_sb); + return err; + } + err = remove_save_link(inode, 1 /* truncate */ ); + reiserfs_write_unlock(inode->i_sb); + if (err) + return err; + } - // Now we should start real work + return result; + } - /* If we are going to write past the file's packed tail or if we are going - to overwrite part of the tail, we need that tail to be converted into - unformatted node */ - res = reiserfs_check_for_tail_and_convert( inode, pos, count); - if (res) - goto out; + if (unlikely((ssize_t) count < 0)) + return -EINVAL; + + if (unlikely(!access_ok(VERIFY_READ, buf, count))) + return -EFAULT; + + down(&inode->i_sem); // locks the entire file for just us + + pos = *ppos; + + /* Check if we can write to specified region of file, file + is not overly big and this kind of stuff. Adjust pos and + count, if needed */ + res = generic_write_checks(file, &pos, &count, 0); + if (res) + goto out; + + if (count == 0) + goto out; + + res = remove_suid(file->f_dentry); + if (res) + goto out; + + inode_update_time(inode, 1); /* Both mtime and ctime */ + + // Ok, we are done with all the checks. + + // Now we should start real work + + /* If we are going to write past the file's packed tail or if we are going + to overwrite part of the tail, we need that tail to be converted into + unformatted node */ + res = reiserfs_check_for_tail_and_convert(inode, pos, count); + if (res) + goto out; + + while (count > 0) { + /* This is the main loop in which we running until some error occures + or until we write all of the data. */ + size_t num_pages; /* amount of pages we are going to write this iteration */ + size_t write_bytes; /* amount of bytes to write during this iteration */ + size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */ + + /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */ + num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial + pages */ + ((count + + (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT); + /* convert size to amount of + pages */ + reiserfs_write_lock(inode->i_sb); + if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME + || num_pages > reiserfs_can_fit_pages(inode->i_sb)) { + /* If we were asked to write more data than we want to or if there + is not that much space, then we shorten amount of data to write + for this iteration. */ + num_pages = + min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, + reiserfs_can_fit_pages(inode->i_sb)); + /* Also we should not forget to set size in bytes accordingly */ + write_bytes = (num_pages << PAGE_CACHE_SHIFT) - + (pos & (PAGE_CACHE_SIZE - 1)); + /* If position is not on the + start of the page, we need + to substract the offset + within page */ + } else + write_bytes = count; + + /* reserve the blocks to be allocated later, so that later on + we still have the space to write the blocks to */ + reiserfs_claim_blocks_to_be_allocated(inode->i_sb, + num_pages << + (PAGE_CACHE_SHIFT - + inode->i_blkbits)); + reiserfs_write_unlock(inode->i_sb); + + if (!num_pages) { /* If we do not have enough space even for a single page... */ + if (pos > + inode->i_size + inode->i_sb->s_blocksize - + (pos & (inode->i_sb->s_blocksize - 1))) { + res = -ENOSPC; + break; // In case we are writing past the end of the last file block, break. + } + // Otherwise we are possibly overwriting the file, so + // let's set write size to be equal or less than blocksize. + // This way we get it correctly for file holes. + // But overwriting files on absolutelly full volumes would not + // be very efficient. Well, people are not supposed to fill + // 100% of disk space anyway. + write_bytes = + min_t(size_t, count, + inode->i_sb->s_blocksize - + (pos & (inode->i_sb->s_blocksize - 1))); + num_pages = 1; + // No blocks were claimed before, so do it now. + reiserfs_claim_blocks_to_be_allocated(inode->i_sb, + 1 << + (PAGE_CACHE_SHIFT + - + inode-> + i_blkbits)); + } - while ( count > 0) { - /* This is the main loop in which we running until some error occures - or until we write all of the data. */ - size_t num_pages;/* amount of pages we are going to write this iteration */ - size_t write_bytes; /* amount of bytes to write during this iteration */ - size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */ - - /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/ - num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial - pages */ - ((count + (pos & (PAGE_CACHE_SIZE-1))) >> PAGE_CACHE_SHIFT); - /* convert size to amount of - pages */ - reiserfs_write_lock(inode->i_sb); - if ( num_pages > REISERFS_WRITE_PAGES_AT_A_TIME - || num_pages > reiserfs_can_fit_pages(inode->i_sb) ) { - /* If we were asked to write more data than we want to or if there - is not that much space, then we shorten amount of data to write - for this iteration. */ - num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb)); - /* Also we should not forget to set size in bytes accordingly */ - write_bytes = (num_pages << PAGE_CACHE_SHIFT) - - (pos & (PAGE_CACHE_SIZE-1)); - /* If position is not on the - start of the page, we need - to substract the offset - within page */ - } else - write_bytes = count; + /* Prepare for writing into the region, read in all the + partially overwritten pages, if needed. And lock the pages, + so that nobody else can access these until we are done. + We get number of actual blocks needed as a result. */ + blocks_to_allocate = + reiserfs_prepare_file_region_for_write(inode, pos, + num_pages, + write_bytes, + prepared_pages); + if (blocks_to_allocate < 0) { + res = blocks_to_allocate; + reiserfs_release_claimed_blocks(inode->i_sb, + num_pages << + (PAGE_CACHE_SHIFT - + inode->i_blkbits)); + break; + } - /* reserve the blocks to be allocated later, so that later on - we still have the space to write the blocks to */ - reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits)); - reiserfs_write_unlock(inode->i_sb); + /* First we correct our estimate of how many blocks we need */ + reiserfs_release_claimed_blocks(inode->i_sb, + (num_pages << + (PAGE_CACHE_SHIFT - + inode->i_sb-> + s_blocksize_bits)) - + blocks_to_allocate); + + if (blocks_to_allocate > 0) { /*We only allocate blocks if we need to */ + /* Fill in all the possible holes and append the file if needed */ + res = + reiserfs_allocate_blocks_for_region(&th, inode, pos, + num_pages, + write_bytes, + prepared_pages, + blocks_to_allocate); + } - if ( !num_pages ) { /* If we do not have enough space even for a single page... */ - if ( pos > inode->i_size+inode->i_sb->s_blocksize-(pos & (inode->i_sb->s_blocksize-1))) { - res = -ENOSPC; - break; // In case we are writing past the end of the last file block, break. - } - // Otherwise we are possibly overwriting the file, so - // let's set write size to be equal or less than blocksize. - // This way we get it correctly for file holes. - // But overwriting files on absolutelly full volumes would not - // be very efficient. Well, people are not supposed to fill - // 100% of disk space anyway. - write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1))); - num_pages = 1; - // No blocks were claimed before, so do it now. - reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)); - } + /* well, we have allocated the blocks, so it is time to free + the reservation we made earlier. */ + reiserfs_release_claimed_blocks(inode->i_sb, + blocks_to_allocate); + if (res) { + reiserfs_unprepare_pages(prepared_pages, num_pages); + break; + } - /* Prepare for writing into the region, read in all the - partially overwritten pages, if needed. And lock the pages, - so that nobody else can access these until we are done. - We get number of actual blocks needed as a result.*/ - blocks_to_allocate = reiserfs_prepare_file_region_for_write(inode, pos, num_pages, write_bytes, prepared_pages); - if ( blocks_to_allocate < 0 ) { - res = blocks_to_allocate; - reiserfs_release_claimed_blocks(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits)); - break; - } +/* NOTE that allocating blocks and filling blocks can be done in reverse order + and probably we would do that just to get rid of garbage in files after a + crash */ - /* First we correct our estimate of how many blocks we need */ - reiserfs_release_claimed_blocks(inode->i_sb, (num_pages << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - blocks_to_allocate ); + /* Copy data from user-supplied buffer to file's pages */ + res = + reiserfs_copy_from_user_to_file_region(pos, num_pages, + write_bytes, + prepared_pages, buf); + if (res) { + reiserfs_unprepare_pages(prepared_pages, num_pages); + break; + } - if ( blocks_to_allocate > 0) {/*We only allocate blocks if we need to*/ - /* Fill in all the possible holes and append the file if needed */ - res = reiserfs_allocate_blocks_for_region(&th, inode, pos, num_pages, write_bytes, prepared_pages, blocks_to_allocate); + /* Send the pages to disk and unlock them. */ + res = + reiserfs_submit_file_region_for_write(&th, inode, pos, + num_pages, + write_bytes, + prepared_pages); + if (res) + break; + + already_written += write_bytes; + buf += write_bytes; + *ppos = pos += write_bytes; + count -= write_bytes; + balance_dirty_pages_ratelimited(inode->i_mapping); } - /* well, we have allocated the blocks, so it is time to free - the reservation we made earlier. */ - reiserfs_release_claimed_blocks(inode->i_sb, blocks_to_allocate); - if ( res ) { - reiserfs_unprepare_pages(prepared_pages, num_pages); - break; + /* this is only true on error */ + if (th.t_trans_id) { + reiserfs_write_lock(inode->i_sb); + err = journal_end(&th, th.t_super, th.t_blocks_allocated); + reiserfs_write_unlock(inode->i_sb); + if (err) { + res = err; + goto out; + } } -/* NOTE that allocating blocks and filling blocks can be done in reverse order - and probably we would do that just to get rid of garbage in files after a - crash */ + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) + res = + generic_osync_inode(inode, file->f_mapping, + OSYNC_METADATA | OSYNC_DATA); - /* Copy data from user-supplied buffer to file's pages */ - res = reiserfs_copy_from_user_to_file_region(pos, num_pages, write_bytes, prepared_pages, buf); - if ( res ) { - reiserfs_unprepare_pages(prepared_pages, num_pages); - break; - } + up(&inode->i_sem); + reiserfs_async_progress_wait(inode->i_sb); + return (already_written != 0) ? already_written : res; - /* Send the pages to disk and unlock them. */ - res = reiserfs_submit_file_region_for_write(&th, inode, pos, num_pages, - write_bytes,prepared_pages); - if ( res ) - break; - - already_written += write_bytes; - buf += write_bytes; - *ppos = pos += write_bytes; - count -= write_bytes; - balance_dirty_pages_ratelimited(inode->i_mapping); - } - - /* this is only true on error */ - if (th.t_trans_id) { - reiserfs_write_lock(inode->i_sb); - err = journal_end(&th, th.t_super, th.t_blocks_allocated); - reiserfs_write_unlock(inode->i_sb); - if (err) { - res = err; - goto out; - } - } - - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) - res = generic_osync_inode(inode, file->f_mapping, OSYNC_METADATA|OSYNC_DATA); - - up(&inode->i_sem); - reiserfs_async_progress_wait(inode->i_sb); - return (already_written != 0)?already_written:res; - -out: - up(&inode->i_sem); // unlock the file on exit. - return res; + out: + up(&inode->i_sem); // unlock the file on exit. + return res; } -static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf, + size_t count, loff_t pos) { - return generic_file_aio_write(iocb, buf, count, pos); + return generic_file_aio_write(iocb, buf, count, pos); } - - struct file_operations reiserfs_file_operations = { - .read = generic_file_read, - .write = reiserfs_file_write, - .ioctl = reiserfs_ioctl, - .mmap = generic_file_mmap, - .release = reiserfs_file_release, - .fsync = reiserfs_sync_file, - .sendfile = generic_file_sendfile, - .aio_read = generic_file_aio_read, - .aio_write = reiserfs_aio_write, + .read = generic_file_read, + .write = reiserfs_file_write, + .ioctl = reiserfs_ioctl, + .mmap = generic_file_mmap, + .release = reiserfs_file_release, + .fsync = reiserfs_sync_file, + .sendfile = generic_file_sendfile, + .aio_read = generic_file_aio_read, + .aio_write = reiserfs_aio_write, }; - -struct inode_operations reiserfs_file_inode_operations = { - .truncate = reiserfs_vfs_truncate_file, - .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, - .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, - .permission = reiserfs_permission, +struct inode_operations reiserfs_file_inode_operations = { + .truncate = reiserfs_vfs_truncate_file, + .setattr = reiserfs_setattr, + .setxattr = reiserfs_setxattr, + .getxattr = reiserfs_getxattr, + .listxattr = reiserfs_listxattr, + .removexattr = reiserfs_removexattr, + .permission = reiserfs_permission, }; - - diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index e4f64be9e15..2706e2adffa 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -34,14 +34,12 @@ ** **/ - #include #include #include #include #include - /* To make any changes in the tree we find a node, that contains item to be changed/deleted or position in the node we insert a new item to. We call this node S. To do balancing we need to decide what we @@ -56,490 +54,522 @@ have to have if we do not any shiftings, if we shift to left/right neighbor or to both. */ - /* taking item number in virtual node, returns number of item, that it has in source buffer */ -static inline int old_item_num (int new_num, int affected_item_num, int mode) +static inline int old_item_num(int new_num, int affected_item_num, int mode) { - if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) - return new_num; + if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) + return new_num; - if (mode == M_INSERT) { + if (mode == M_INSERT) { - RFALSE( new_num == 0, - "vs-8005: for INSERT mode and item number of inserted item"); + RFALSE(new_num == 0, + "vs-8005: for INSERT mode and item number of inserted item"); - return new_num - 1; - } + return new_num - 1; + } - RFALSE( mode != M_DELETE, - "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", mode); - /* delete mode */ - return new_num + 1; + RFALSE(mode != M_DELETE, + "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", + mode); + /* delete mode */ + return new_num + 1; } -static void create_virtual_node (struct tree_balance * tb, int h) +static void create_virtual_node(struct tree_balance *tb, int h) { - struct item_head * ih; - struct virtual_node * vn = tb->tb_vn; - int new_num; - struct buffer_head * Sh; /* this comes from tb->S[h] */ + struct item_head *ih; + struct virtual_node *vn = tb->tb_vn; + int new_num; + struct buffer_head *Sh; /* this comes from tb->S[h] */ - Sh = PATH_H_PBUFFER (tb->tb_path, h); + Sh = PATH_H_PBUFFER(tb->tb_path, h); - /* size of changed node */ - vn->vn_size = MAX_CHILD_SIZE (Sh) - B_FREE_SPACE (Sh) + tb->insert_size[h]; + /* size of changed node */ + vn->vn_size = + MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h]; - /* for internal nodes array if virtual items is not created */ - if (h) { - vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); - return; - } - - /* number of items in virtual node */ - vn->vn_nr_item = B_NR_ITEMS (Sh) + ((vn->vn_mode == M_INSERT)? 1 : 0) - ((vn->vn_mode == M_DELETE)? 1 : 0); - - /* first virtual item */ - vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); - memset (vn->vn_vi, 0, vn->vn_nr_item * sizeof (struct virtual_item)); - vn->vn_free_ptr += vn->vn_nr_item * sizeof (struct virtual_item); - - - /* first item in the node */ - ih = B_N_PITEM_HEAD (Sh, 0); - - /* define the mergeability for 0-th item (if it is not being deleted) */ - if (op_is_left_mergeable (&(ih->ih_key), Sh->b_size) && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) - vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; - - /* go through all items those remain in the virtual node (except for the new (inserted) one) */ - for (new_num = 0; new_num < vn->vn_nr_item; new_num ++) { - int j; - struct virtual_item * vi = vn->vn_vi + new_num; - int is_affected = ((new_num != vn->vn_affected_item_num) ? 0 : 1); - - - if (is_affected && vn->vn_mode == M_INSERT) - continue; - - /* get item number in source node */ - j = old_item_num (new_num, vn->vn_affected_item_num, vn->vn_mode); - - vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; - vi->vi_ih = ih + j; - vi->vi_item = B_I_PITEM (Sh, ih + j); - vi->vi_uarea = vn->vn_free_ptr; - - // FIXME: there is no check, that item operation did not - // consume too much memory - vn->vn_free_ptr += op_create_vi (vn, vi, is_affected, tb->insert_size [0]); - if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) - reiserfs_panic (tb->tb_sb, "vs-8030: create_virtual_node: " - "virtual node space consumed"); - - if (!is_affected) - /* this is not being changed */ - continue; - - if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { - vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; - vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted + /* for internal nodes array if virtual items is not created */ + if (h) { + vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); + return; } - } - - - /* virtual inserted item is not defined yet */ - if (vn->vn_mode == M_INSERT) { - struct virtual_item * vi = vn->vn_vi + vn->vn_affected_item_num; - - RFALSE( vn->vn_ins_ih == 0, - "vs-8040: item header of inserted item is not specified"); - vi->vi_item_len = tb->insert_size[0]; - vi->vi_ih = vn->vn_ins_ih; - vi->vi_item = vn->vn_data; - vi->vi_uarea = vn->vn_free_ptr; - - op_create_vi (vn, vi, 0/*not pasted or cut*/, tb->insert_size [0]); - } - - /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ - if (tb->CFR[0]) { - struct reiserfs_key * key; - - key = B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]); - if (op_is_left_mergeable (key, Sh->b_size) && (vn->vn_mode != M_DELETE || - vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1)) - vn->vn_vi[vn->vn_nr_item-1].vi_type |= VI_TYPE_RIGHT_MERGEABLE; -#ifdef CONFIG_REISERFS_CHECK - if (op_is_left_mergeable (key, Sh->b_size) && - !(vn->vn_mode != M_DELETE || vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1) ) { - /* we delete last item and it could be merged with right neighbor's first item */ - if (!(B_NR_ITEMS (Sh) == 1 && is_direntry_le_ih (B_N_PITEM_HEAD (Sh, 0)) && - I_ENTRY_COUNT (B_N_PITEM_HEAD (Sh, 0)) == 1)) { - /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ - print_block (Sh, 0, -1, -1); - reiserfs_panic (tb->tb_sb, "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", - key, vn->vn_affected_item_num, vn->vn_mode, M_DELETE); - } else - /* we can delete directory item, that has only one directory entry in it */ - ; + /* number of items in virtual node */ + vn->vn_nr_item = + B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) - + ((vn->vn_mode == M_DELETE) ? 1 : 0); + + /* first virtual item */ + vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); + memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item)); + vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); + + /* first item in the node */ + ih = B_N_PITEM_HEAD(Sh, 0); + + /* define the mergeability for 0-th item (if it is not being deleted) */ + if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size) + && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) + vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; + + /* go through all items those remain in the virtual node (except for the new (inserted) one) */ + for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { + int j; + struct virtual_item *vi = vn->vn_vi + new_num; + int is_affected = + ((new_num != vn->vn_affected_item_num) ? 0 : 1); + + if (is_affected && vn->vn_mode == M_INSERT) + continue; + + /* get item number in source node */ + j = old_item_num(new_num, vn->vn_affected_item_num, + vn->vn_mode); + + vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; + vi->vi_ih = ih + j; + vi->vi_item = B_I_PITEM(Sh, ih + j); + vi->vi_uarea = vn->vn_free_ptr; + + // FIXME: there is no check, that item operation did not + // consume too much memory + vn->vn_free_ptr += + op_create_vi(vn, vi, is_affected, tb->insert_size[0]); + if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) + reiserfs_panic(tb->tb_sb, + "vs-8030: create_virtual_node: " + "virtual node space consumed"); + + if (!is_affected) + /* this is not being changed */ + continue; + + if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { + vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; + vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted + } } + + /* virtual inserted item is not defined yet */ + if (vn->vn_mode == M_INSERT) { + struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num; + + RFALSE(vn->vn_ins_ih == 0, + "vs-8040: item header of inserted item is not specified"); + vi->vi_item_len = tb->insert_size[0]; + vi->vi_ih = vn->vn_ins_ih; + vi->vi_item = vn->vn_data; + vi->vi_uarea = vn->vn_free_ptr; + + op_create_vi(vn, vi, 0 /*not pasted or cut */ , + tb->insert_size[0]); + } + + /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ + if (tb->CFR[0]) { + struct reiserfs_key *key; + + key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]); + if (op_is_left_mergeable(key, Sh->b_size) + && (vn->vn_mode != M_DELETE + || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) + vn->vn_vi[vn->vn_nr_item - 1].vi_type |= + VI_TYPE_RIGHT_MERGEABLE; + +#ifdef CONFIG_REISERFS_CHECK + if (op_is_left_mergeable(key, Sh->b_size) && + !(vn->vn_mode != M_DELETE + || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { + /* we delete last item and it could be merged with right neighbor's first item */ + if (! + (B_NR_ITEMS(Sh) == 1 + && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0)) + && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) { + /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ + print_block(Sh, 0, -1, -1); + reiserfs_panic(tb->tb_sb, + "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", + key, vn->vn_affected_item_num, + vn->vn_mode, M_DELETE); + } else + /* we can delete directory item, that has only one directory entry in it */ + ; + } #endif - - } -} + } +} /* using virtual node check, how many items can be shifted to left neighbor */ -static void check_left (struct tree_balance * tb, int h, int cur_free) +static void check_left(struct tree_balance *tb, int h, int cur_free) { - int i; - struct virtual_node * vn = tb->tb_vn; - struct virtual_item * vi; - int d_size, ih_size; + int i; + struct virtual_node *vn = tb->tb_vn; + struct virtual_item *vi; + int d_size, ih_size; - RFALSE( cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); + RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); - /* internal level */ - if (h > 0) { - tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); - return; - } + /* internal level */ + if (h > 0) { + tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); + return; + } - /* leaf level */ + /* leaf level */ - if (!cur_free || !vn->vn_nr_item) { - /* no free space or nothing to move */ - tb->lnum[h] = 0; - tb->lbytes = -1; - return; - } + if (!cur_free || !vn->vn_nr_item) { + /* no free space or nothing to move */ + tb->lnum[h] = 0; + tb->lbytes = -1; + return; + } - RFALSE( !PATH_H_PPARENT (tb->tb_path, 0), - "vs-8055: parent does not exist or invalid"); + RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), + "vs-8055: parent does not exist or invalid"); - vi = vn->vn_vi; - if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { - /* all contents of S[0] fits into L[0] */ + vi = vn->vn_vi; + if ((unsigned int)cur_free >= + (vn->vn_size - + ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { + /* all contents of S[0] fits into L[0] */ - RFALSE( vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, - "vs-8055: invalid mode or balance condition failed"); + RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, + "vs-8055: invalid mode or balance condition failed"); - tb->lnum[0] = vn->vn_nr_item; - tb->lbytes = -1; - return; - } - - - d_size = 0, ih_size = IH_SIZE; - - /* first item may be merge with last item in left neighbor */ - if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) - d_size = -((int)IH_SIZE), ih_size = 0; - - tb->lnum[0] = 0; - for (i = 0; i < vn->vn_nr_item; i ++, ih_size = IH_SIZE, d_size = 0, vi ++) { - d_size += vi->vi_item_len; - if (cur_free >= d_size) { - /* the item can be shifted entirely */ - cur_free -= d_size; - tb->lnum[0] ++; - continue; + tb->lnum[0] = vn->vn_nr_item; + tb->lbytes = -1; + return; } - - /* the item cannot be shifted entirely, try to split it */ - /* check whether L[0] can hold ih and at least one byte of the item body */ - if (cur_free <= ih_size) { - /* cannot shift even a part of the current item */ - tb->lbytes = -1; - return; + + d_size = 0, ih_size = IH_SIZE; + + /* first item may be merge with last item in left neighbor */ + if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) + d_size = -((int)IH_SIZE), ih_size = 0; + + tb->lnum[0] = 0; + for (i = 0; i < vn->vn_nr_item; + i++, ih_size = IH_SIZE, d_size = 0, vi++) { + d_size += vi->vi_item_len; + if (cur_free >= d_size) { + /* the item can be shifted entirely */ + cur_free -= d_size; + tb->lnum[0]++; + continue; + } + + /* the item cannot be shifted entirely, try to split it */ + /* check whether L[0] can hold ih and at least one byte of the item body */ + if (cur_free <= ih_size) { + /* cannot shift even a part of the current item */ + tb->lbytes = -1; + return; + } + cur_free -= ih_size; + + tb->lbytes = op_check_left(vi, cur_free, 0, 0); + if (tb->lbytes != -1) + /* count partially shifted item */ + tb->lnum[0]++; + + break; } - cur_free -= ih_size; - - tb->lbytes = op_check_left (vi, cur_free, 0, 0); - if (tb->lbytes != -1) - /* count partially shifted item */ - tb->lnum[0] ++; - - break; - } - - return; -} + return; +} /* using virtual node check, how many items can be shifted to right neighbor */ -static void check_right (struct tree_balance * tb, int h, int cur_free) +static void check_right(struct tree_balance *tb, int h, int cur_free) { - int i; - struct virtual_node * vn = tb->tb_vn; - struct virtual_item * vi; - int d_size, ih_size; - - RFALSE( cur_free < 0, "vs-8070: cur_free < 0"); - - /* internal level */ - if (h > 0) { - tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); - return; - } - - /* leaf level */ - - if (!cur_free || !vn->vn_nr_item) { - /* no free space */ - tb->rnum[h] = 0; - tb->rbytes = -1; - return; - } - - RFALSE( !PATH_H_PPARENT (tb->tb_path, 0), - "vs-8075: parent does not exist or invalid"); - - vi = vn->vn_vi + vn->vn_nr_item - 1; - if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { - /* all contents of S[0] fits into R[0] */ - - RFALSE( vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, - "vs-8080: invalid mode or balance condition failed"); - - tb->rnum[h] = vn->vn_nr_item; - tb->rbytes = -1; - return; - } - - d_size = 0, ih_size = IH_SIZE; - - /* last item may be merge with first item in right neighbor */ - if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) - d_size = -(int)IH_SIZE, ih_size = 0; - - tb->rnum[0] = 0; - for (i = vn->vn_nr_item - 1; i >= 0; i --, d_size = 0, ih_size = IH_SIZE, vi --) { - d_size += vi->vi_item_len; - if (cur_free >= d_size) { - /* the item can be shifted entirely */ - cur_free -= d_size; - tb->rnum[0] ++; - continue; + int i; + struct virtual_node *vn = tb->tb_vn; + struct virtual_item *vi; + int d_size, ih_size; + + RFALSE(cur_free < 0, "vs-8070: cur_free < 0"); + + /* internal level */ + if (h > 0) { + tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); + return; } - - /* check whether R[0] can hold ih and at least one byte of the item body */ - if ( cur_free <= ih_size ) { /* cannot shift even a part of the current item */ - tb->rbytes = -1; - return; + + /* leaf level */ + + if (!cur_free || !vn->vn_nr_item) { + /* no free space */ + tb->rnum[h] = 0; + tb->rbytes = -1; + return; } - - /* R[0] can hold the header of the item and at least one byte of its body */ - cur_free -= ih_size; /* cur_free is still > 0 */ - - tb->rbytes = op_check_right (vi, cur_free); - if (tb->rbytes != -1) - /* count partially shifted item */ - tb->rnum[0] ++; - - break; - } - - return; -} + RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), + "vs-8075: parent does not exist or invalid"); + + vi = vn->vn_vi + vn->vn_nr_item - 1; + if ((unsigned int)cur_free >= + (vn->vn_size - + ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { + /* all contents of S[0] fits into R[0] */ + + RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, + "vs-8080: invalid mode or balance condition failed"); + + tb->rnum[h] = vn->vn_nr_item; + tb->rbytes = -1; + return; + } + + d_size = 0, ih_size = IH_SIZE; + + /* last item may be merge with first item in right neighbor */ + if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) + d_size = -(int)IH_SIZE, ih_size = 0; + + tb->rnum[0] = 0; + for (i = vn->vn_nr_item - 1; i >= 0; + i--, d_size = 0, ih_size = IH_SIZE, vi--) { + d_size += vi->vi_item_len; + if (cur_free >= d_size) { + /* the item can be shifted entirely */ + cur_free -= d_size; + tb->rnum[0]++; + continue; + } + + /* check whether R[0] can hold ih and at least one byte of the item body */ + if (cur_free <= ih_size) { /* cannot shift even a part of the current item */ + tb->rbytes = -1; + return; + } + + /* R[0] can hold the header of the item and at least one byte of its body */ + cur_free -= ih_size; /* cur_free is still > 0 */ + + tb->rbytes = op_check_right(vi, cur_free); + if (tb->rbytes != -1) + /* count partially shifted item */ + tb->rnum[0]++; + + break; + } + + return; +} /* * from - number of items, which are shifted to left neighbor entirely * to - number of item, which are shifted to right neighbor entirely * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ -static int get_num_ver (int mode, struct tree_balance * tb, int h, - int from, int from_bytes, - int to, int to_bytes, - short * snum012, int flow - ) +static int get_num_ver(int mode, struct tree_balance *tb, int h, + int from, int from_bytes, + int to, int to_bytes, short *snum012, int flow) { - int i; - int cur_free; - // int bytes; - int units; - struct virtual_node * vn = tb->tb_vn; - // struct virtual_item * vi; - - int total_node_size, max_node_size, current_item_size; - int needed_nodes; - int start_item, /* position of item we start filling node from */ - end_item, /* position of item we finish filling node by */ - start_bytes,/* number of first bytes (entries for directory) of start_item-th item - we do not include into node that is being filled */ - end_bytes; /* number of last bytes (entries for directory) of end_item-th item - we do node include into node that is being filled */ - int split_item_positions[2]; /* these are positions in virtual item of - items, that are split between S[0] and - S1new and S1new and S2new */ - - split_item_positions[0] = -1; - split_item_positions[1] = -1; - - /* We only create additional nodes if we are in insert or paste mode - or we are in replace mode at the internal level. If h is 0 and - the mode is M_REPLACE then in fix_nodes we change the mode to - paste or insert before we get here in the code. */ - RFALSE( tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), - "vs-8100: insert_size < 0 in overflow"); - - max_node_size = MAX_CHILD_SIZE (PATH_H_PBUFFER (tb->tb_path, h)); - - /* snum012 [0-2] - number of items, that lay - to S[0], first new node and second new node */ - snum012[3] = -1; /* s1bytes */ - snum012[4] = -1; /* s2bytes */ - - /* internal level */ - if (h > 0) { - i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); - if (i == max_node_size) - return 1; - return (i / max_node_size + 1); - } - - /* leaf level */ - needed_nodes = 1; - total_node_size = 0; - cur_free = max_node_size; - - // start from 'from'-th item - start_item = from; - // skip its first 'start_bytes' units - start_bytes = ((from_bytes != -1) ? from_bytes : 0); - - // last included item is the 'end_item'-th one - end_item = vn->vn_nr_item - to - 1; - // do not count last 'end_bytes' units of 'end_item'-th item - end_bytes = (to_bytes != -1) ? to_bytes : 0; - - /* go through all item beginning from the start_item-th item and ending by - the end_item-th item. Do not count first 'start_bytes' units of - 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ - - for (i = start_item; i <= end_item; i ++) { - struct virtual_item * vi = vn->vn_vi + i; - int skip_from_end = ((i == end_item) ? end_bytes : 0); - - RFALSE( needed_nodes > 3, "vs-8105: too many nodes are needed"); - - /* get size of current item */ - current_item_size = vi->vi_item_len; - - /* do not take in calculation head part (from_bytes) of from-th item */ - current_item_size -= op_part_size (vi, 0/*from start*/, start_bytes); - - /* do not take in calculation tail part of last item */ - current_item_size -= op_part_size (vi, 1/*from end*/, skip_from_end); - - /* if item fits into current node entierly */ - if (total_node_size + current_item_size <= max_node_size) { - snum012[needed_nodes - 1] ++; - total_node_size += current_item_size; - start_bytes = 0; - continue; + int i; + int cur_free; + // int bytes; + int units; + struct virtual_node *vn = tb->tb_vn; + // struct virtual_item * vi; + + int total_node_size, max_node_size, current_item_size; + int needed_nodes; + int start_item, /* position of item we start filling node from */ + end_item, /* position of item we finish filling node by */ + start_bytes, /* number of first bytes (entries for directory) of start_item-th item + we do not include into node that is being filled */ + end_bytes; /* number of last bytes (entries for directory) of end_item-th item + we do node include into node that is being filled */ + int split_item_positions[2]; /* these are positions in virtual item of + items, that are split between S[0] and + S1new and S1new and S2new */ + + split_item_positions[0] = -1; + split_item_positions[1] = -1; + + /* We only create additional nodes if we are in insert or paste mode + or we are in replace mode at the internal level. If h is 0 and + the mode is M_REPLACE then in fix_nodes we change the mode to + paste or insert before we get here in the code. */ + RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), + "vs-8100: insert_size < 0 in overflow"); + + max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); + + /* snum012 [0-2] - number of items, that lay + to S[0], first new node and second new node */ + snum012[3] = -1; /* s1bytes */ + snum012[4] = -1; /* s2bytes */ + + /* internal level */ + if (h > 0) { + i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); + if (i == max_node_size) + return 1; + return (i / max_node_size + 1); } - if (current_item_size > max_node_size) { - /* virtual item length is longer, than max size of item in - a node. It is impossible for direct item */ - RFALSE( is_direct_le_ih (vi->vi_ih), - "vs-8110: " - "direct item length is %d. It can not be longer than %d", - current_item_size, max_node_size); - /* we will try to split it */ - flow = 1; + /* leaf level */ + needed_nodes = 1; + total_node_size = 0; + cur_free = max_node_size; + + // start from 'from'-th item + start_item = from; + // skip its first 'start_bytes' units + start_bytes = ((from_bytes != -1) ? from_bytes : 0); + + // last included item is the 'end_item'-th one + end_item = vn->vn_nr_item - to - 1; + // do not count last 'end_bytes' units of 'end_item'-th item + end_bytes = (to_bytes != -1) ? to_bytes : 0; + + /* go through all item beginning from the start_item-th item and ending by + the end_item-th item. Do not count first 'start_bytes' units of + 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ + + for (i = start_item; i <= end_item; i++) { + struct virtual_item *vi = vn->vn_vi + i; + int skip_from_end = ((i == end_item) ? end_bytes : 0); + + RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed"); + + /* get size of current item */ + current_item_size = vi->vi_item_len; + + /* do not take in calculation head part (from_bytes) of from-th item */ + current_item_size -= + op_part_size(vi, 0 /*from start */ , start_bytes); + + /* do not take in calculation tail part of last item */ + current_item_size -= + op_part_size(vi, 1 /*from end */ , skip_from_end); + + /* if item fits into current node entierly */ + if (total_node_size + current_item_size <= max_node_size) { + snum012[needed_nodes - 1]++; + total_node_size += current_item_size; + start_bytes = 0; + continue; + } + + if (current_item_size > max_node_size) { + /* virtual item length is longer, than max size of item in + a node. It is impossible for direct item */ + RFALSE(is_direct_le_ih(vi->vi_ih), + "vs-8110: " + "direct item length is %d. It can not be longer than %d", + current_item_size, max_node_size); + /* we will try to split it */ + flow = 1; + } + + if (!flow) { + /* as we do not split items, take new node and continue */ + needed_nodes++; + i--; + total_node_size = 0; + continue; + } + // calculate number of item units which fit into node being + // filled + { + int free_space; + + free_space = max_node_size - total_node_size - IH_SIZE; + units = + op_check_left(vi, free_space, start_bytes, + skip_from_end); + if (units == -1) { + /* nothing fits into current node, take new node and continue */ + needed_nodes++, i--, total_node_size = 0; + continue; + } + } + + /* something fits into the current node */ + //if (snum012[3] != -1 || needed_nodes != 1) + // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required"); + //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units; + start_bytes += units; + snum012[needed_nodes - 1 + 3] = units; + + if (needed_nodes > 2) + reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: " + "split_item_position is out of boundary"); + snum012[needed_nodes - 1]++; + split_item_positions[needed_nodes - 1] = i; + needed_nodes++; + /* continue from the same item with start_bytes != -1 */ + start_item = i; + i--; + total_node_size = 0; } - if (!flow) { - /* as we do not split items, take new node and continue */ - needed_nodes ++; i --; total_node_size = 0; - continue; + // sum012[4] (if it is not -1) contains number of units of which + // are to be in S1new, snum012[3] - to be in S0. They are supposed + // to be S1bytes and S2bytes correspondingly, so recalculate + if (snum012[4] > 0) { + int split_item_num; + int bytes_to_r, bytes_to_l; + int bytes_to_S1new; + + split_item_num = split_item_positions[1]; + bytes_to_l = + ((from == split_item_num + && from_bytes != -1) ? from_bytes : 0); + bytes_to_r = + ((end_item == split_item_num + && end_bytes != -1) ? end_bytes : 0); + bytes_to_S1new = + ((split_item_positions[0] == + split_item_positions[1]) ? snum012[3] : 0); + + // s2bytes + snum012[4] = + op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - + bytes_to_r - bytes_to_l - bytes_to_S1new; + + if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && + vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) + reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not " + "directory or indirect item"); } - // calculate number of item units which fit into node being - // filled - { - int free_space; - - free_space = max_node_size - total_node_size - IH_SIZE; - units = op_check_left (vi, free_space, start_bytes, skip_from_end); - if (units == -1) { - /* nothing fits into current node, take new node and continue */ - needed_nodes ++, i--, total_node_size = 0; - continue; - } + /* now we know S2bytes, calculate S1bytes */ + if (snum012[3] > 0) { + int split_item_num; + int bytes_to_r, bytes_to_l; + int bytes_to_S2new; + + split_item_num = split_item_positions[0]; + bytes_to_l = + ((from == split_item_num + && from_bytes != -1) ? from_bytes : 0); + bytes_to_r = + ((end_item == split_item_num + && end_bytes != -1) ? end_bytes : 0); + bytes_to_S2new = + ((split_item_positions[0] == split_item_positions[1] + && snum012[4] != -1) ? snum012[4] : 0); + + // s1bytes + snum012[3] = + op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - + bytes_to_r - bytes_to_l - bytes_to_S2new; } - /* something fits into the current node */ - //if (snum012[3] != -1 || needed_nodes != 1) - // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required"); - //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units; - start_bytes += units; - snum012[needed_nodes - 1 + 3] = units; - - if (needed_nodes > 2) - reiserfs_warning (tb->tb_sb, "vs-8111: get_num_ver: " - "split_item_position is out of boundary"); - snum012[needed_nodes - 1] ++; - split_item_positions[needed_nodes - 1] = i; - needed_nodes ++; - /* continue from the same item with start_bytes != -1 */ - start_item = i; - i --; - total_node_size = 0; - } - - // sum012[4] (if it is not -1) contains number of units of which - // are to be in S1new, snum012[3] - to be in S0. They are supposed - // to be S1bytes and S2bytes correspondingly, so recalculate - if (snum012[4] > 0) { - int split_item_num; - int bytes_to_r, bytes_to_l; - int bytes_to_S1new; - - split_item_num = split_item_positions[1]; - bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0); - bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0); - bytes_to_S1new = ((split_item_positions[0] == split_item_positions[1]) ? snum012[3] : 0); - - // s2bytes - snum012[4] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[4] - bytes_to_r - bytes_to_l - bytes_to_S1new; - - if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && - vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) - reiserfs_warning (tb->tb_sb, "vs-8115: get_num_ver: not " - "directory or indirect item"); - } - - /* now we know S2bytes, calculate S1bytes */ - if (snum012[3] > 0) { - int split_item_num; - int bytes_to_r, bytes_to_l; - int bytes_to_S2new; - - split_item_num = split_item_positions[0]; - bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0); - bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0); - bytes_to_S2new = ((split_item_positions[0] == split_item_positions[1] && snum012[4] != -1) ? snum012[4] : 0); - - // s1bytes - snum012[3] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[3] - bytes_to_r - bytes_to_l - bytes_to_S2new; - } - - return needed_nodes; + return needed_nodes; } - #ifdef CONFIG_REISERFS_CHECK -extern struct tree_balance * cur_tb; +extern struct tree_balance *cur_tb; #endif - /* Set parameters for balancing. * Performs write of results of analysis of balancing into structure tb, * where it will later be used by the functions that actually do the balancing. @@ -557,131 +587,130 @@ extern struct tree_balance * cur_tb; * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) */ -static void set_parameters (struct tree_balance * tb, int h, int lnum, - int rnum, int blk_num, short * s012, int lb, int rb) +static void set_parameters(struct tree_balance *tb, int h, int lnum, + int rnum, int blk_num, short *s012, int lb, int rb) { - tb->lnum[h] = lnum; - tb->rnum[h] = rnum; - tb->blknum[h] = blk_num; + tb->lnum[h] = lnum; + tb->rnum[h] = rnum; + tb->blknum[h] = blk_num; - if (h == 0) - { /* only for leaf level */ - if (s012 != NULL) - { - tb->s0num = * s012 ++, - tb->s1num = * s012 ++, - tb->s2num = * s012 ++; - tb->s1bytes = * s012 ++; - tb->s2bytes = * s012; + if (h == 0) { /* only for leaf level */ + if (s012 != NULL) { + tb->s0num = *s012++, + tb->s1num = *s012++, tb->s2num = *s012++; + tb->s1bytes = *s012++; + tb->s2bytes = *s012; + } + tb->lbytes = lb; + tb->rbytes = rb; } - tb->lbytes = lb; - tb->rbytes = rb; - } - PROC_INFO_ADD( tb -> tb_sb, lnum[ h ], lnum ); - PROC_INFO_ADD( tb -> tb_sb, rnum[ h ], rnum ); - - PROC_INFO_ADD( tb -> tb_sb, lbytes[ h ], lb ); - PROC_INFO_ADD( tb -> tb_sb, rbytes[ h ], rb ); -} - + PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum); + PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum); + PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb); + PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); +} /* check, does node disappear if we shift tb->lnum[0] items to left neighbor and tb->rnum[0] to the right one. */ -static int is_leaf_removable (struct tree_balance * tb) +static int is_leaf_removable(struct tree_balance *tb) { - struct virtual_node * vn = tb->tb_vn; - int to_left, to_right; - int size; - int remain_items; - - /* number of items, that will be shifted to left (right) neighbor - entirely */ - to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); - to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); - remain_items = vn->vn_nr_item; - - /* how many items remain in S[0] after shiftings to neighbors */ - remain_items -= (to_left + to_right); - - if (remain_items < 1) { - /* all content of node can be shifted to neighbors */ - set_parameters (tb, 0, to_left, vn->vn_nr_item - to_left, 0, NULL, -1, -1); - return 1; - } - - if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) - /* S[0] is not removable */ - return 0; - - /* check, whether we can divide 1 remaining item between neighbors */ - - /* get size of remaining item (in item units) */ - size = op_unit_num (&(vn->vn_vi[to_left])); - - if (tb->lbytes + tb->rbytes >= size) { - set_parameters (tb, 0, to_left + 1, to_right + 1, 0, NULL, tb->lbytes, -1); - return 1; - } - - return 0; -} + struct virtual_node *vn = tb->tb_vn; + int to_left, to_right; + int size; + int remain_items; + + /* number of items, that will be shifted to left (right) neighbor + entirely */ + to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); + to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); + remain_items = vn->vn_nr_item; + + /* how many items remain in S[0] after shiftings to neighbors */ + remain_items -= (to_left + to_right); + + if (remain_items < 1) { + /* all content of node can be shifted to neighbors */ + set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, + NULL, -1, -1); + return 1; + } + if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) + /* S[0] is not removable */ + return 0; + + /* check, whether we can divide 1 remaining item between neighbors */ + + /* get size of remaining item (in item units) */ + size = op_unit_num(&(vn->vn_vi[to_left])); + + if (tb->lbytes + tb->rbytes >= size) { + set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, + tb->lbytes, -1); + return 1; + } + + return 0; +} /* check whether L, S, R can be joined in one node */ -static int are_leaves_removable (struct tree_balance * tb, int lfree, int rfree) +static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) { - struct virtual_node * vn = tb->tb_vn; - int ih_size; - struct buffer_head *S0; - - S0 = PATH_H_PBUFFER (tb->tb_path, 0); - - ih_size = 0; - if (vn->vn_nr_item) { - if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) - ih_size += IH_SIZE; - - if (vn->vn_vi[vn->vn_nr_item-1].vi_type & VI_TYPE_RIGHT_MERGEABLE) - ih_size += IH_SIZE; - } else { - /* there was only one item and it will be deleted */ - struct item_head * ih; - - RFALSE( B_NR_ITEMS (S0) != 1, - "vs-8125: item number must be 1: it is %d", B_NR_ITEMS(S0)); - - ih = B_N_PITEM_HEAD (S0, 0); - if (tb->CFR[0] && !comp_short_le_keys (&(ih->ih_key), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]))) - if (is_direntry_le_ih (ih)) { - /* Directory must be in correct state here: that is - somewhere at the left side should exist first directory - item. But the item being deleted can not be that first - one because its right neighbor is item of the same - directory. (But first item always gets deleted in last - turn). So, neighbors of deleted item can be merged, so - we can save ih_size */ - ih_size = IH_SIZE; - - /* we might check that left neighbor exists and is of the - same directory */ - RFALSE(le_ih_k_offset (ih) == DOT_OFFSET, - "vs-8130: first directory item can not be removed until directory is not empty"); - } - - } - - if (MAX_CHILD_SIZE (S0) + vn->vn_size <= rfree + lfree + ih_size) { - set_parameters (tb, 0, -1, -1, -1, NULL, -1, -1); - PROC_INFO_INC( tb -> tb_sb, leaves_removable ); - return 1; - } - return 0; - -} + struct virtual_node *vn = tb->tb_vn; + int ih_size; + struct buffer_head *S0; + + S0 = PATH_H_PBUFFER(tb->tb_path, 0); + + ih_size = 0; + if (vn->vn_nr_item) { + if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) + ih_size += IH_SIZE; + + if (vn->vn_vi[vn->vn_nr_item - 1]. + vi_type & VI_TYPE_RIGHT_MERGEABLE) + ih_size += IH_SIZE; + } else { + /* there was only one item and it will be deleted */ + struct item_head *ih; + + RFALSE(B_NR_ITEMS(S0) != 1, + "vs-8125: item number must be 1: it is %d", + B_NR_ITEMS(S0)); + + ih = B_N_PITEM_HEAD(S0, 0); + if (tb->CFR[0] + && !comp_short_le_keys(&(ih->ih_key), + B_N_PDELIM_KEY(tb->CFR[0], + tb->rkey[0]))) + if (is_direntry_le_ih(ih)) { + /* Directory must be in correct state here: that is + somewhere at the left side should exist first directory + item. But the item being deleted can not be that first + one because its right neighbor is item of the same + directory. (But first item always gets deleted in last + turn). So, neighbors of deleted item can be merged, so + we can save ih_size */ + ih_size = IH_SIZE; + + /* we might check that left neighbor exists and is of the + same directory */ + RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, + "vs-8130: first directory item can not be removed until directory is not empty"); + } + } + + if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) { + set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1); + PROC_INFO_INC(tb->tb_sb, leaves_removable); + return 1; + } + return 0; +} /* when we do not split item, lnum and rnum are numbers of entire items */ #define SET_PAR_SHIFT_LEFT \ @@ -704,7 +733,6 @@ else \ -1, -1);\ } - #define SET_PAR_SHIFT_RIGHT \ if (h)\ {\ @@ -724,214 +752,199 @@ else \ -1, -1);\ } - -static void free_buffers_in_tb ( - struct tree_balance * p_s_tb - ) { - int n_counter; - - decrement_counters_in_path(p_s_tb->tb_path); - - for ( n_counter = 0; n_counter < MAX_HEIGHT; n_counter++ ) { - decrement_bcount(p_s_tb->L[n_counter]); - p_s_tb->L[n_counter] = NULL; - decrement_bcount(p_s_tb->R[n_counter]); - p_s_tb->R[n_counter] = NULL; - decrement_bcount(p_s_tb->FL[n_counter]); - p_s_tb->FL[n_counter] = NULL; - decrement_bcount(p_s_tb->FR[n_counter]); - p_s_tb->FR[n_counter] = NULL; - decrement_bcount(p_s_tb->CFL[n_counter]); - p_s_tb->CFL[n_counter] = NULL; - decrement_bcount(p_s_tb->CFR[n_counter]); - p_s_tb->CFR[n_counter] = NULL; - } +static void free_buffers_in_tb(struct tree_balance *p_s_tb) +{ + int n_counter; + + decrement_counters_in_path(p_s_tb->tb_path); + + for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) { + decrement_bcount(p_s_tb->L[n_counter]); + p_s_tb->L[n_counter] = NULL; + decrement_bcount(p_s_tb->R[n_counter]); + p_s_tb->R[n_counter] = NULL; + decrement_bcount(p_s_tb->FL[n_counter]); + p_s_tb->FL[n_counter] = NULL; + decrement_bcount(p_s_tb->FR[n_counter]); + p_s_tb->FR[n_counter] = NULL; + decrement_bcount(p_s_tb->CFL[n_counter]); + p_s_tb->CFL[n_counter] = NULL; + decrement_bcount(p_s_tb->CFR[n_counter]); + p_s_tb->CFR[n_counter] = NULL; + } } - /* Get new buffers for storing new nodes that are created while balancing. * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; * CARRY_ON - schedule didn't occur while the function worked; * NO_DISK_SPACE - no disk space. */ /* The function is NOT SCHEDULE-SAFE! */ -static int get_empty_nodes( - struct tree_balance * p_s_tb, - int n_h - ) { - struct buffer_head * p_s_new_bh, - * p_s_Sh = PATH_H_PBUFFER (p_s_tb->tb_path, n_h); - b_blocknr_t * p_n_blocknr, - a_n_blocknrs[MAX_AMOUNT_NEEDED] = {0, }; - int n_counter, - n_number_of_freeblk, - n_amount_needed,/* number of needed empty blocks */ - n_retval = CARRY_ON; - struct super_block * p_s_sb = p_s_tb->tb_sb; - - - /* number_of_freeblk is the number of empty blocks which have been - acquired for use by the balancing algorithm minus the number of - empty blocks used in the previous levels of the analysis, - number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs - after empty blocks are acquired, and the balancing analysis is - then restarted, amount_needed is the number needed by this level - (n_h) of the balancing analysis. - - Note that for systems with many processes writing, it would be - more layout optimal to calculate the total number needed by all - levels and then to run reiserfs_new_blocks to get all of them at once. */ - - /* Initiate number_of_freeblk to the amount acquired prior to the restart of - the analysis or 0 if not restarted, then subtract the amount needed - by all of the levels of the tree below n_h. */ - /* blknum includes S[n_h], so we subtract 1 in this calculation */ - for ( n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; n_counter < n_h; n_counter++ ) - n_number_of_freeblk -= ( p_s_tb->blknum[n_counter] ) ? (p_s_tb->blknum[n_counter] - 1) : 0; - - /* Allocate missing empty blocks. */ - /* if p_s_Sh == 0 then we are getting a new root */ - n_amount_needed = ( p_s_Sh ) ? (p_s_tb->blknum[n_h] - 1) : 1; - /* Amount_needed = the amount that we need more than the amount that we have. */ - if ( n_amount_needed > n_number_of_freeblk ) - n_amount_needed -= n_number_of_freeblk; - else /* If we have enough already then there is nothing to do. */ - return CARRY_ON; - - /* No need to check quota - is not allocated for blocks used for formatted nodes */ - if (reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs, - n_amount_needed) == NO_DISK_SPACE) - return NO_DISK_SPACE; - - /* for each blocknumber we just got, get a buffer and stick it on FEB */ - for ( p_n_blocknr = a_n_blocknrs, n_counter = 0; n_counter < n_amount_needed; - p_n_blocknr++, n_counter++ ) { - - RFALSE( ! *p_n_blocknr, - "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); - - p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr); - RFALSE (buffer_dirty (p_s_new_bh) || - buffer_journaled (p_s_new_bh) || - buffer_journal_dirty (p_s_new_bh), - "PAP-8140: journlaled or dirty buffer %b for the new block", - p_s_new_bh); - - /* Put empty buffers into the array. */ - RFALSE (p_s_tb->FEB[p_s_tb->cur_blknum], - "PAP-8141: busy slot for new buffer"); - - set_buffer_journal_new (p_s_new_bh); - p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh; - } - - if ( n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB (p_s_tb) ) - n_retval = REPEAT_SEARCH ; - - return n_retval; -} +static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h) +{ + struct buffer_head *p_s_new_bh, + *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h); + b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; + int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */ + n_retval = CARRY_ON; + struct super_block *p_s_sb = p_s_tb->tb_sb; + + /* number_of_freeblk is the number of empty blocks which have been + acquired for use by the balancing algorithm minus the number of + empty blocks used in the previous levels of the analysis, + number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs + after empty blocks are acquired, and the balancing analysis is + then restarted, amount_needed is the number needed by this level + (n_h) of the balancing analysis. + + Note that for systems with many processes writing, it would be + more layout optimal to calculate the total number needed by all + levels and then to run reiserfs_new_blocks to get all of them at once. */ + + /* Initiate number_of_freeblk to the amount acquired prior to the restart of + the analysis or 0 if not restarted, then subtract the amount needed + by all of the levels of the tree below n_h. */ + /* blknum includes S[n_h], so we subtract 1 in this calculation */ + for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; + n_counter < n_h; n_counter++) + n_number_of_freeblk -= + (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] - + 1) : 0; + + /* Allocate missing empty blocks. */ + /* if p_s_Sh == 0 then we are getting a new root */ + n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1; + /* Amount_needed = the amount that we need more than the amount that we have. */ + if (n_amount_needed > n_number_of_freeblk) + n_amount_needed -= n_number_of_freeblk; + else /* If we have enough already then there is nothing to do. */ + return CARRY_ON; + + /* No need to check quota - is not allocated for blocks used for formatted nodes */ + if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs, + n_amount_needed) == NO_DISK_SPACE) + return NO_DISK_SPACE; + + /* for each blocknumber we just got, get a buffer and stick it on FEB */ + for (p_n_blocknr = a_n_blocknrs, n_counter = 0; + n_counter < n_amount_needed; p_n_blocknr++, n_counter++) { + + RFALSE(!*p_n_blocknr, + "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); + + p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr); + RFALSE(buffer_dirty(p_s_new_bh) || + buffer_journaled(p_s_new_bh) || + buffer_journal_dirty(p_s_new_bh), + "PAP-8140: journlaled or dirty buffer %b for the new block", + p_s_new_bh); + + /* Put empty buffers into the array. */ + RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum], + "PAP-8141: busy slot for new buffer"); + + set_buffer_journal_new(p_s_new_bh); + p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh; + } + + if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb)) + n_retval = REPEAT_SEARCH; + return n_retval; +} /* Get free space of the left neighbor, which is stored in the parent * node of the left neighbor. */ -static int get_lfree (struct tree_balance * tb, int h) +static int get_lfree(struct tree_balance *tb, int h) { - struct buffer_head * l, * f; - int order; + struct buffer_head *l, *f; + int order; - if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0) - return 0; + if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0) + return 0; - if (f == l) - order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) - 1; - else { - order = B_NR_ITEMS (l); - f = l; - } + if (f == l) + order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1; + else { + order = B_NR_ITEMS(l); + f = l; + } - return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f,order))); + return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); } - /* Get free space of the right neighbor, * which is stored in the parent node of the right neighbor. */ -static int get_rfree (struct tree_balance * tb, int h) +static int get_rfree(struct tree_balance *tb, int h) { - struct buffer_head * r, * f; - int order; + struct buffer_head *r, *f; + int order; - if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0) - return 0; + if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0) + return 0; - if (f == r) - order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) + 1; - else { - order = 0; - f = r; - } + if (f == r) + order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1; + else { + order = 0; + f = r; + } - return (MAX_CHILD_SIZE(f) - dc_size( B_N_CHILD(f,order))); + return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); } - /* Check whether left neighbor is in memory. */ -static int is_left_neighbor_in_cache( - struct tree_balance * p_s_tb, - int n_h - ) { - struct buffer_head * p_s_father, * left; - struct super_block * p_s_sb = p_s_tb->tb_sb; - b_blocknr_t n_left_neighbor_blocknr; - int n_left_neighbor_position; - - if ( ! p_s_tb->FL[n_h] ) /* Father of the left neighbor does not exist. */ - return 0; - - /* Calculate father of the node to be balanced. */ - p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1); - - RFALSE( ! p_s_father || - ! B_IS_IN_TREE (p_s_father) || - ! B_IS_IN_TREE (p_s_tb->FL[n_h]) || - ! buffer_uptodate (p_s_father) || - ! buffer_uptodate (p_s_tb->FL[n_h]), - "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", - p_s_father, p_s_tb->FL[n_h]); - - - /* Get position of the pointer to the left neighbor into the left father. */ - n_left_neighbor_position = ( p_s_father == p_s_tb->FL[n_h] ) ? - p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]); - /* Get left neighbor block number. */ - n_left_neighbor_blocknr = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); - /* Look for the left neighbor in the cache. */ - if ( (left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr)) ) { - - RFALSE( buffer_uptodate (left) && ! B_IS_IN_TREE(left), - "vs-8170: left neighbor (%b %z) is not in the tree", left, left); - put_bh(left) ; - return 1; - } - - return 0; -} +static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h) +{ + struct buffer_head *p_s_father, *left; + struct super_block *p_s_sb = p_s_tb->tb_sb; + b_blocknr_t n_left_neighbor_blocknr; + int n_left_neighbor_position; + + if (!p_s_tb->FL[n_h]) /* Father of the left neighbor does not exist. */ + return 0; + + /* Calculate father of the node to be balanced. */ + p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1); + + RFALSE(!p_s_father || + !B_IS_IN_TREE(p_s_father) || + !B_IS_IN_TREE(p_s_tb->FL[n_h]) || + !buffer_uptodate(p_s_father) || + !buffer_uptodate(p_s_tb->FL[n_h]), + "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", + p_s_father, p_s_tb->FL[n_h]); + + /* Get position of the pointer to the left neighbor into the left father. */ + n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ? + p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]); + /* Get left neighbor block number. */ + n_left_neighbor_blocknr = + B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); + /* Look for the left neighbor in the cache. */ + if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) { + + RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), + "vs-8170: left neighbor (%b %z) is not in the tree", + left, left); + put_bh(left); + return 1; + } + return 0; +} #define LEFT_PARENTS 'l' #define RIGHT_PARENTS 'r' - -static void decrement_key (struct cpu_key * p_s_key) +static void decrement_key(struct cpu_key *p_s_key) { - // call item specific function for this key - item_ops[cpu_key_k_type (p_s_key)]->decrement_key (p_s_key); + // call item specific function for this key + item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key); } - - - /* Calculate far left/right parent of the left/right neighbor of the current node, that * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. * Calculate left/right common parent of the current node and L[h]/R[h]. @@ -940,111 +953,121 @@ static void decrement_key (struct cpu_key * p_s_key) SCHEDULE_OCCURRED - schedule occurred while the function worked; * CARRY_ON - schedule didn't occur while the function worked; */ -static int get_far_parent (struct tree_balance * p_s_tb, - int n_h, - struct buffer_head ** pp_s_father, - struct buffer_head ** pp_s_com_father, - char c_lr_par) +static int get_far_parent(struct tree_balance *p_s_tb, + int n_h, + struct buffer_head **pp_s_father, + struct buffer_head **pp_s_com_father, char c_lr_par) { - struct buffer_head * p_s_parent; - INITIALIZE_PATH (s_path_to_neighbor_father); - struct path * p_s_path = p_s_tb->tb_path; - struct cpu_key s_lr_father_key; - int n_counter, - n_position = INT_MAX, - n_first_last_position = 0, - n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h); - - /* Starting from F[n_h] go upwards in the tree, and look for the common - ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ - - n_counter = n_path_offset; - - RFALSE( n_counter < FIRST_PATH_ELEMENT_OFFSET, - "PAP-8180: invalid path length"); - - - for ( ; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter-- ) { - /* Check whether parent of the current buffer in the path is really parent in the tree. */ - if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)) ) - return REPEAT_SEARCH; - /* Check whether position in the parent is correct. */ - if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_counter - 1)) > B_NR_ITEMS(p_s_parent) ) - return REPEAT_SEARCH; - /* Check whether parent at the path really points to the child. */ - if ( B_N_CHILD_NUM(p_s_parent, n_position) != - PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr ) - return REPEAT_SEARCH; - /* Return delimiting key if position in the parent is not equal to first/last one. */ - if ( c_lr_par == RIGHT_PARENTS ) - n_first_last_position = B_NR_ITEMS (p_s_parent); - if ( n_position != n_first_last_position ) { - *pp_s_com_father = p_s_parent; - get_bh(*pp_s_com_father) ; - /*(*pp_s_com_father = p_s_parent)->b_count++;*/ - break; + struct buffer_head *p_s_parent; + INITIALIZE_PATH(s_path_to_neighbor_father); + struct path *p_s_path = p_s_tb->tb_path; + struct cpu_key s_lr_father_key; + int n_counter, + n_position = INT_MAX, + n_first_last_position = 0, + n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h); + + /* Starting from F[n_h] go upwards in the tree, and look for the common + ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ + + n_counter = n_path_offset; + + RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET, + "PAP-8180: invalid path length"); + + for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) { + /* Check whether parent of the current buffer in the path is really parent in the tree. */ + if (!B_IS_IN_TREE + (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1))) + return REPEAT_SEARCH; + /* Check whether position in the parent is correct. */ + if ((n_position = + PATH_OFFSET_POSITION(p_s_path, + n_counter - 1)) > + B_NR_ITEMS(p_s_parent)) + return REPEAT_SEARCH; + /* Check whether parent at the path really points to the child. */ + if (B_N_CHILD_NUM(p_s_parent, n_position) != + PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr) + return REPEAT_SEARCH; + /* Return delimiting key if position in the parent is not equal to first/last one. */ + if (c_lr_par == RIGHT_PARENTS) + n_first_last_position = B_NR_ITEMS(p_s_parent); + if (n_position != n_first_last_position) { + *pp_s_com_father = p_s_parent; + get_bh(*pp_s_com_father); + /*(*pp_s_com_father = p_s_parent)->b_count++; */ + break; + } } - } - - /* if we are in the root of the tree, then there is no common father */ - if ( n_counter == FIRST_PATH_ELEMENT_OFFSET ) { - /* Check whether first buffer in the path is the root of the tree. */ - if ( PATH_OFFSET_PBUFFER(p_s_tb->tb_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == - SB_ROOT_BLOCK (p_s_tb->tb_sb) ) { - *pp_s_father = *pp_s_com_father = NULL; - return CARRY_ON; + + /* if we are in the root of the tree, then there is no common father */ + if (n_counter == FIRST_PATH_ELEMENT_OFFSET) { + /* Check whether first buffer in the path is the root of the tree. */ + if (PATH_OFFSET_PBUFFER + (p_s_tb->tb_path, + FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == + SB_ROOT_BLOCK(p_s_tb->tb_sb)) { + *pp_s_father = *pp_s_com_father = NULL; + return CARRY_ON; + } + return REPEAT_SEARCH; } - return REPEAT_SEARCH; - } - RFALSE( B_LEVEL (*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL, - "PAP-8185: (%b %z) level too small", - *pp_s_com_father, *pp_s_com_father); + RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL, + "PAP-8185: (%b %z) level too small", + *pp_s_com_father, *pp_s_com_father); - /* Check whether the common parent is locked. */ + /* Check whether the common parent is locked. */ - if ( buffer_locked (*pp_s_com_father) ) { - __wait_on_buffer(*pp_s_com_father); - if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { - decrement_bcount(*pp_s_com_father); - return REPEAT_SEARCH; + if (buffer_locked(*pp_s_com_father)) { + __wait_on_buffer(*pp_s_com_father); + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + decrement_bcount(*pp_s_com_father); + return REPEAT_SEARCH; + } } - } - - /* So, we got common parent of the current node and its left/right neighbor. - Now we are geting the parent of the left/right neighbor. */ - /* Form key to get parent of the left/right neighbor. */ - le_key2cpu_key (&s_lr_father_key, B_N_PDELIM_KEY(*pp_s_com_father, ( c_lr_par == LEFT_PARENTS ) ? - (p_s_tb->lkey[n_h - 1] = n_position - 1) : (p_s_tb->rkey[n_h - 1] = n_position))); + /* So, we got common parent of the current node and its left/right neighbor. + Now we are geting the parent of the left/right neighbor. */ + /* Form key to get parent of the left/right neighbor. */ + le_key2cpu_key(&s_lr_father_key, + B_N_PDELIM_KEY(*pp_s_com_father, + (c_lr_par == + LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] = + n_position - + 1) : (p_s_tb->rkey[n_h - + 1] = + n_position))); - if ( c_lr_par == LEFT_PARENTS ) - decrement_key(&s_lr_father_key); + if (c_lr_par == LEFT_PARENTS) + decrement_key(&s_lr_father_key); - if (search_by_key(p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, n_h + 1) == IO_ERROR) - // path is released - return IO_ERROR; + if (search_by_key + (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, + n_h + 1) == IO_ERROR) + // path is released + return IO_ERROR; - if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { - decrement_counters_in_path(&s_path_to_neighbor_father); - decrement_bcount(*pp_s_com_father); - return REPEAT_SEARCH; - } + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + decrement_counters_in_path(&s_path_to_neighbor_father); + decrement_bcount(*pp_s_com_father); + return REPEAT_SEARCH; + } - *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); + *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); - RFALSE( B_LEVEL (*pp_s_father) != n_h + 1, - "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father); - RFALSE( s_path_to_neighbor_father.path_length < FIRST_PATH_ELEMENT_OFFSET, - "PAP-8192: path length is too small"); + RFALSE(B_LEVEL(*pp_s_father) != n_h + 1, + "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father); + RFALSE(s_path_to_neighbor_father.path_length < + FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); - s_path_to_neighbor_father.path_length--; - decrement_counters_in_path(&s_path_to_neighbor_father); - return CARRY_ON; + s_path_to_neighbor_father.path_length--; + decrement_counters_in_path(&s_path_to_neighbor_father); + return CARRY_ON; } - /* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset], * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset]. @@ -1052,122 +1075,127 @@ static int get_far_parent (struct tree_balance * p_s_tb, * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; * CARRY_ON - schedule didn't occur while the function worked; */ -static int get_parents (struct tree_balance * p_s_tb, int n_h) +static int get_parents(struct tree_balance *p_s_tb, int n_h) { - struct path * p_s_path = p_s_tb->tb_path; - int n_position, - n_ret_value, - n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); - struct buffer_head * p_s_curf, - * p_s_curcf; - - /* Current node is the root of the tree or will be root of the tree */ - if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) { - /* The root can not have parents. - Release nodes which previously were obtained as parents of the current node neighbors. */ + struct path *p_s_path = p_s_tb->tb_path; + int n_position, + n_ret_value, + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); + struct buffer_head *p_s_curf, *p_s_curcf; + + /* Current node is the root of the tree or will be root of the tree */ + if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { + /* The root can not have parents. + Release nodes which previously were obtained as parents of the current node neighbors. */ + decrement_bcount(p_s_tb->FL[n_h]); + decrement_bcount(p_s_tb->CFL[n_h]); + decrement_bcount(p_s_tb->FR[n_h]); + decrement_bcount(p_s_tb->CFR[n_h]); + p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = + p_s_tb->CFR[n_h] = NULL; + return CARRY_ON; + } + + /* Get parent FL[n_path_offset] of L[n_path_offset]. */ + if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) { + /* Current node is not the first child of its parent. */ + /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */ + p_s_curf = p_s_curcf = + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); + get_bh(p_s_curf); + get_bh(p_s_curf); + p_s_tb->lkey[n_h] = n_position - 1; + } else { + /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node. + Calculate current common parent of L[n_path_offset] and the current node. Note that + CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset]. + Calculate lkey[n_path_offset]. */ + if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, + &p_s_curcf, + LEFT_PARENTS)) != CARRY_ON) + return n_ret_value; + } + decrement_bcount(p_s_tb->FL[n_h]); + p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ decrement_bcount(p_s_tb->CFL[n_h]); - decrement_bcount(p_s_tb->FR[n_h]); - decrement_bcount(p_s_tb->CFR[n_h]); - p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = p_s_tb->CFR[n_h] = NULL; - return CARRY_ON; - } - - /* Get parent FL[n_path_offset] of L[n_path_offset]. */ - if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) ) { - /* Current node is not the first child of its parent. */ - /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/ - p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); - get_bh(p_s_curf) ; - get_bh(p_s_curf) ; - p_s_tb->lkey[n_h] = n_position - 1; - } - else { - /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node. - Calculate current common parent of L[n_path_offset] and the current node. Note that - CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset]. - Calculate lkey[n_path_offset]. */ - if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, - &p_s_curcf, LEFT_PARENTS)) != CARRY_ON ) - return n_ret_value; - } - - decrement_bcount(p_s_tb->FL[n_h]); - p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ - decrement_bcount(p_s_tb->CFL[n_h]); - p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ - - RFALSE( (p_s_curf && !B_IS_IN_TREE (p_s_curf)) || - (p_s_curcf && !B_IS_IN_TREE (p_s_curcf)), - "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf); + p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ + + RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || + (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), + "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf); /* Get parent FR[n_h] of R[n_h]. */ /* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */ - if ( n_position == B_NR_ITEMS (PATH_H_PBUFFER(p_s_path, n_h + 1)) ) { + if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) { /* Calculate current parent of R[n_h], which is the right neighbor of F[n_h]. Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ - if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, RIGHT_PARENTS)) != CARRY_ON ) - return n_ret_value; - } - else { + if ((n_ret_value = + get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, + RIGHT_PARENTS)) != CARRY_ON) + return n_ret_value; + } else { /* Current node is not the last child of its parent F[n_h]. */ - /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/ - p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); - get_bh(p_s_curf) ; - get_bh(p_s_curf) ; - p_s_tb->rkey[n_h] = n_position; - } - - decrement_bcount(p_s_tb->FR[n_h]); - p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */ - - decrement_bcount(p_s_tb->CFR[n_h]); - p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */ - - RFALSE( (p_s_curf && !B_IS_IN_TREE (p_s_curf)) || - (p_s_curcf && !B_IS_IN_TREE (p_s_curcf)), - "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf); - - return CARRY_ON; -} + /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */ + p_s_curf = p_s_curcf = + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); + get_bh(p_s_curf); + get_bh(p_s_curf); + p_s_tb->rkey[n_h] = n_position; + } + decrement_bcount(p_s_tb->FR[n_h]); + p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */ + + decrement_bcount(p_s_tb->CFR[n_h]); + p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */ + + RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || + (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), + "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf); + + return CARRY_ON; +} /* it is possible to remove node as result of shiftings to neighbors even when we insert or paste item. */ -static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree, struct tree_balance * tb, int h) +static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, + struct tree_balance *tb, int h) { - struct buffer_head * Sh = PATH_H_PBUFFER (tb->tb_path, h); - int levbytes = tb->insert_size[h]; - struct item_head * ih; - struct reiserfs_key * r_key = NULL; - - ih = B_N_PITEM_HEAD (Sh, 0); - if ( tb->CFR[h] ) - r_key = B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]); - - if ( - lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes - /* shifting may merge items which might save space */ - - (( ! h && op_is_left_mergeable (&(ih->ih_key), Sh->b_size) ) ? IH_SIZE : 0) - - (( ! h && r_key && op_is_left_mergeable (r_key, Sh->b_size) ) ? IH_SIZE : 0) - + (( h ) ? KEY_SIZE : 0)) - { - /* node can not be removed */ - if (sfree >= levbytes ) { /* new item fits into node S[h] without any shifting */ - if ( ! h ) - tb->s0num = B_NR_ITEMS(Sh) + ((mode == M_INSERT ) ? 1 : 0); - set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; + struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h); + int levbytes = tb->insert_size[h]; + struct item_head *ih; + struct reiserfs_key *r_key = NULL; + + ih = B_N_PITEM_HEAD(Sh, 0); + if (tb->CFR[h]) + r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]); + + if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes + /* shifting may merge items which might save space */ + - + ((!h + && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0) + - + ((!h && r_key + && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) + + ((h) ? KEY_SIZE : 0)) { + /* node can not be removed */ + if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ + if (!h) + tb->s0num = + B_NR_ITEMS(Sh) + + ((mode == M_INSERT) ? 1 : 0); + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; + } } - } - PROC_INFO_INC( tb -> tb_sb, can_node_be_removed[ h ] ); - return !NO_BALANCING_NEEDED; + PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]); + return !NO_BALANCING_NEEDED; } - - /* Check whether current node S[h] is balanced when increasing its size by * Inserting or Pasting. * Calculate parameters for balancing for current level h. @@ -1182,154 +1210,157 @@ static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree * -2 - no disk space. */ /* ip means Inserting or Pasting */ -static int ip_check_balance (struct tree_balance * tb, int h) +static int ip_check_balance(struct tree_balance *tb, int h) { - struct virtual_node * vn = tb->tb_vn; - int levbytes, /* Number of bytes that must be inserted into (value - is negative if bytes are deleted) buffer which - contains node being balanced. The mnemonic is - that the attempted change in node space used level - is levbytes bytes. */ - n_ret_value; - - int lfree, sfree, rfree /* free space in L, S and R */; - - /* nver is short for number of vertixes, and lnver is the number if - we shift to the left, rnver is the number if we shift to the - right, and lrnver is the number if we shift in both directions. - The goal is to minimize first the number of vertixes, and second, - the number of vertixes whose contents are changed by shifting, - and third the number of uncached vertixes whose contents are - changed by shifting and must be read from disk. */ - int nver, lnver, rnver, lrnver; - - /* used at leaf level only, S0 = S[0] is the node being balanced, - sInum [ I = 0,1,2 ] is the number of items that will - remain in node SI after balancing. S1 and S2 are new - nodes that might be created. */ - - /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. - where 4th parameter is s1bytes and 5th - s2bytes - */ - short snum012[40] = {0,}; /* s0num, s1num, s2num for 8 cases - 0,1 - do not shift and do not shift but bottle - 2 - shift only whole item to left - 3 - shift to left and bottle as much as possible - 4,5 - shift to right (whole items and as much as possible - 6,7 - shift to both directions (whole items and as much as possible) - */ - - /* Sh is the node whose balance is currently being checked */ - struct buffer_head * Sh; - - Sh = PATH_H_PBUFFER (tb->tb_path, h); - levbytes = tb->insert_size[h]; - - /* Calculate balance parameters for creating new root. */ - if ( ! Sh ) { - if ( ! h ) - reiserfs_panic (tb->tb_sb, "vs-8210: ip_check_balance: S[0] can not be 0"); - switch ( n_ret_value = get_empty_nodes (tb, h) ) { - case CARRY_ON: - set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ - - case NO_DISK_SPACE: - case REPEAT_SEARCH: - return n_ret_value; - default: - reiserfs_panic(tb->tb_sb, "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes"); + struct virtual_node *vn = tb->tb_vn; + int levbytes, /* Number of bytes that must be inserted into (value + is negative if bytes are deleted) buffer which + contains node being balanced. The mnemonic is + that the attempted change in node space used level + is levbytes bytes. */ + n_ret_value; + + int lfree, sfree, rfree /* free space in L, S and R */ ; + + /* nver is short for number of vertixes, and lnver is the number if + we shift to the left, rnver is the number if we shift to the + right, and lrnver is the number if we shift in both directions. + The goal is to minimize first the number of vertixes, and second, + the number of vertixes whose contents are changed by shifting, + and third the number of uncached vertixes whose contents are + changed by shifting and must be read from disk. */ + int nver, lnver, rnver, lrnver; + + /* used at leaf level only, S0 = S[0] is the node being balanced, + sInum [ I = 0,1,2 ] is the number of items that will + remain in node SI after balancing. S1 and S2 are new + nodes that might be created. */ + + /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. + where 4th parameter is s1bytes and 5th - s2bytes + */ + short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases + 0,1 - do not shift and do not shift but bottle + 2 - shift only whole item to left + 3 - shift to left and bottle as much as possible + 4,5 - shift to right (whole items and as much as possible + 6,7 - shift to both directions (whole items and as much as possible) + */ + + /* Sh is the node whose balance is currently being checked */ + struct buffer_head *Sh; + + Sh = PATH_H_PBUFFER(tb->tb_path, h); + levbytes = tb->insert_size[h]; + + /* Calculate balance parameters for creating new root. */ + if (!Sh) { + if (!h) + reiserfs_panic(tb->tb_sb, + "vs-8210: ip_check_balance: S[0] can not be 0"); + switch (n_ret_value = get_empty_nodes(tb, h)) { + case CARRY_ON: + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ + + case NO_DISK_SPACE: + case REPEAT_SEARCH: + return n_ret_value; + default: + reiserfs_panic(tb->tb_sb, + "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes"); + } } - } - - if ( (n_ret_value = get_parents (tb, h)) != CARRY_ON ) /* get parents of S[h] neighbors. */ - return n_ret_value; - - sfree = B_FREE_SPACE (Sh); - - /* get free space of neighbors */ - rfree = get_rfree (tb, h); - lfree = get_lfree (tb, h); - - if (can_node_be_removed (vn->vn_mode, lfree, sfree, rfree, tb, h) == NO_BALANCING_NEEDED) - /* and new item fits into node S[h] without any shifting */ - return NO_BALANCING_NEEDED; - - create_virtual_node (tb, h); - - /* - determine maximal number of items we can shift to the left neighbor (in tb structure) - and the maximal number of bytes that can flow to the left neighbor - from the left most liquid item that cannot be shifted from S[0] entirely (returned value) - */ - check_left (tb, h, lfree); - - /* - determine maximal number of items we can shift to the right neighbor (in tb structure) - and the maximal number of bytes that can flow to the right neighbor - from the right most liquid item that cannot be shifted from S[0] entirely (returned value) - */ - check_right (tb, h, rfree); - - - /* all contents of internal node S[h] can be moved into its - neighbors, S[h] will be removed after balancing */ - if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { - int to_r; - - /* Since we are working on internal nodes, and our internal - nodes have fixed size entries, then we can balance by the - number of items rather than the space they consume. In this - routine we set the left node equal to the right node, - allowing a difference of less than or equal to 1 child - pointer. */ - to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - - (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); - set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* this checks balance condition, that any two neighboring nodes can not fit in one node */ - RFALSE( h && - ( tb->lnum[h] >= vn->vn_nr_item + 1 || - tb->rnum[h] >= vn->vn_nr_item + 1), - "vs-8220: tree is not balanced on internal level"); - RFALSE( ! h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || - (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1)) ), - "vs-8225: tree is not balanced on leaf level"); - - /* all contents of S[0] can be moved into its neighbors - S[0] will be removed after balancing. */ - if (!h && is_leaf_removable (tb)) - return CARRY_ON; + if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ + return n_ret_value; - /* why do we perform this check here rather than earlier?? - Answer: we can win 1 node in some cases above. Moreover we - checked it above, when we checked, that S[0] is not removable - in principle */ - if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ - if ( ! h ) - tb->s0num = vn->vn_nr_item; - set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } + sfree = B_FREE_SPACE(Sh); + + /* get free space of neighbors */ + rfree = get_rfree(tb, h); + lfree = get_lfree(tb, h); + + if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == + NO_BALANCING_NEEDED) + /* and new item fits into node S[h] without any shifting */ + return NO_BALANCING_NEEDED; + create_virtual_node(tb, h); - { - int lpar, rpar, nset, lset, rset, lrset; - /* - * regular overflowing of the node + /* + determine maximal number of items we can shift to the left neighbor (in tb structure) + and the maximal number of bytes that can flow to the left neighbor + from the left most liquid item that cannot be shifted from S[0] entirely (returned value) */ + check_left(tb, h, lfree); - /* get_num_ver works in 2 modes (FLOW & NO_FLOW) - lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) - nset, lset, rset, lrset - shows, whether flowing items give better packing - */ + /* + determine maximal number of items we can shift to the right neighbor (in tb structure) + and the maximal number of bytes that can flow to the right neighbor + from the right most liquid item that cannot be shifted from S[0] entirely (returned value) + */ + check_right(tb, h, rfree); + + /* all contents of internal node S[h] can be moved into its + neighbors, S[h] will be removed after balancing */ + if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { + int to_r; + + /* Since we are working on internal nodes, and our internal + nodes have fixed size entries, then we can balance by the + number of items rather than the space they consume. In this + routine we set the left node equal to the right node, + allowing a difference of less than or equal to 1 child + pointer. */ + to_r = + ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - + tb->rnum[h]); + set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, + -1, -1); + return CARRY_ON; + } + + /* this checks balance condition, that any two neighboring nodes can not fit in one node */ + RFALSE(h && + (tb->lnum[h] >= vn->vn_nr_item + 1 || + tb->rnum[h] >= vn->vn_nr_item + 1), + "vs-8220: tree is not balanced on internal level"); + RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || + (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), + "vs-8225: tree is not balanced on leaf level"); + + /* all contents of S[0] can be moved into its neighbors + S[0] will be removed after balancing. */ + if (!h && is_leaf_removable(tb)) + return CARRY_ON; + + /* why do we perform this check here rather than earlier?? + Answer: we can win 1 node in some cases above. Moreover we + checked it above, when we checked, that S[0] is not removable + in principle */ + if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ + if (!h) + tb->s0num = vn->vn_nr_item; + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; + } + + { + int lpar, rpar, nset, lset, rset, lrset; + /* + * regular overflowing of the node + */ + + /* get_num_ver works in 2 modes (FLOW & NO_FLOW) + lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) + nset, lset, rset, lrset - shows, whether flowing items give better packing + */ #define FLOW 1 -#define NO_FLOW 0 /* do not any splitting */ +#define NO_FLOW 0 /* do not any splitting */ - /* we choose one the following */ + /* we choose one the following */ #define NOTHING_SHIFT_NO_FLOW 0 #define NOTHING_SHIFT_FLOW 5 #define LEFT_SHIFT_NO_FLOW 10 @@ -1339,164 +1370,173 @@ static int ip_check_balance (struct tree_balance * tb, int h) #define LR_SHIFT_NO_FLOW 30 #define LR_SHIFT_FLOW 35 + lpar = tb->lnum[h]; + rpar = tb->rnum[h]; + + /* calculate number of blocks S[h] must be split into when + nothing is shifted to the neighbors, + as well as number of items in each part of the split node (s012 numbers), + and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ + nset = NOTHING_SHIFT_NO_FLOW; + nver = get_num_ver(vn->vn_mode, tb, h, + 0, -1, h ? vn->vn_nr_item : 0, -1, + snum012, NO_FLOW); + + if (!h) { + int nver1; + + /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ + nver1 = get_num_ver(vn->vn_mode, tb, h, + 0, -1, 0, -1, + snum012 + NOTHING_SHIFT_FLOW, FLOW); + if (nver > nver1) + nset = NOTHING_SHIFT_FLOW, nver = nver1; + } - lpar = tb->lnum[h]; - rpar = tb->rnum[h]; - - - /* calculate number of blocks S[h] must be split into when - nothing is shifted to the neighbors, - as well as number of items in each part of the split node (s012 numbers), - and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ - nset = NOTHING_SHIFT_NO_FLOW; - nver = get_num_ver (vn->vn_mode, tb, h, - 0, -1, h?vn->vn_nr_item:0, -1, - snum012, NO_FLOW); - - if (!h) - { - int nver1; - - /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ - nver1 = get_num_ver (vn->vn_mode, tb, h, - 0, -1, 0, -1, - snum012 + NOTHING_SHIFT_FLOW, FLOW); - if (nver > nver1) - nset = NOTHING_SHIFT_FLOW, nver = nver1; - } - - - /* calculate number of blocks S[h] must be split into when - l_shift_num first items and l_shift_bytes of the right most - liquid item to be shifted are shifted to the left neighbor, - as well as number of items in each part of the splitted node (s012 numbers), - and number of bytes (s1bytes) of the shared drop which flow to S1 if any - */ - lset = LEFT_SHIFT_NO_FLOW; - lnver = get_num_ver (vn->vn_mode, tb, h, - lpar - (( h || tb->lbytes == -1 ) ? 0 : 1), -1, h ? vn->vn_nr_item:0, -1, - snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); - if (!h) - { - int lnver1; - - lnver1 = get_num_ver (vn->vn_mode, tb, h, - lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, 0, -1, - snum012 + LEFT_SHIFT_FLOW, FLOW); - if (lnver > lnver1) - lset = LEFT_SHIFT_FLOW, lnver = lnver1; - } - - - /* calculate number of blocks S[h] must be split into when - r_shift_num first items and r_shift_bytes of the left most - liquid item to be shifted are shifted to the right neighbor, - as well as number of items in each part of the splitted node (s012 numbers), - and number of bytes (s1bytes) of the shared drop which flow to S1 if any - */ - rset = RIGHT_SHIFT_NO_FLOW; - rnver = get_num_ver (vn->vn_mode, tb, h, - 0, -1, h ? (vn->vn_nr_item-rpar) : (rpar - (( tb->rbytes != -1 ) ? 1 : 0)), -1, - snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); - if (!h) - { - int rnver1; - - rnver1 = get_num_ver (vn->vn_mode, tb, h, - 0, -1, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes, - snum012 + RIGHT_SHIFT_FLOW, FLOW); - - if (rnver > rnver1) - rset = RIGHT_SHIFT_FLOW, rnver = rnver1; - } - - - /* calculate number of blocks S[h] must be split into when - items are shifted in both directions, - as well as number of items in each part of the splitted node (s012 numbers), - and number of bytes (s1bytes) of the shared drop which flow to S1 if any - */ - lrset = LR_SHIFT_NO_FLOW; - lrnver = get_num_ver (vn->vn_mode, tb, h, - lpar - ((h || tb->lbytes == -1) ? 0 : 1), -1, h ? (vn->vn_nr_item-rpar):(rpar - ((tb->rbytes != -1) ? 1 : 0)), -1, - snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); - if (!h) - { - int lrnver1; - - lrnver1 = get_num_ver (vn->vn_mode, tb, h, - lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes, - snum012 + LR_SHIFT_FLOW, FLOW); - if (lrnver > lrnver1) - lrset = LR_SHIFT_FLOW, lrnver = lrnver1; - } - - + /* calculate number of blocks S[h] must be split into when + l_shift_num first items and l_shift_bytes of the right most + liquid item to be shifted are shifted to the left neighbor, + as well as number of items in each part of the splitted node (s012 numbers), + and number of bytes (s1bytes) of the shared drop which flow to S1 if any + */ + lset = LEFT_SHIFT_NO_FLOW; + lnver = get_num_ver(vn->vn_mode, tb, h, + lpar - ((h || tb->lbytes == -1) ? 0 : 1), + -1, h ? vn->vn_nr_item : 0, -1, + snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); + if (!h) { + int lnver1; + + lnver1 = get_num_ver(vn->vn_mode, tb, h, + lpar - + ((tb->lbytes != -1) ? 1 : 0), + tb->lbytes, 0, -1, + snum012 + LEFT_SHIFT_FLOW, FLOW); + if (lnver > lnver1) + lset = LEFT_SHIFT_FLOW, lnver = lnver1; + } - /* Our general shifting strategy is: - 1) to minimized number of new nodes; - 2) to minimized number of neighbors involved in shifting; - 3) to minimized number of disk reads; */ + /* calculate number of blocks S[h] must be split into when + r_shift_num first items and r_shift_bytes of the left most + liquid item to be shifted are shifted to the right neighbor, + as well as number of items in each part of the splitted node (s012 numbers), + and number of bytes (s1bytes) of the shared drop which flow to S1 if any + */ + rset = RIGHT_SHIFT_NO_FLOW; + rnver = get_num_ver(vn->vn_mode, tb, h, + 0, -1, + h ? (vn->vn_nr_item - rpar) : (rpar - + ((tb-> + rbytes != + -1) ? 1 : + 0)), -1, + snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); + if (!h) { + int rnver1; + + rnver1 = get_num_ver(vn->vn_mode, tb, h, + 0, -1, + (rpar - + ((tb->rbytes != -1) ? 1 : 0)), + tb->rbytes, + snum012 + RIGHT_SHIFT_FLOW, FLOW); + + if (rnver > rnver1) + rset = RIGHT_SHIFT_FLOW, rnver = rnver1; + } - /* we can win TWO or ONE nodes by shifting in both directions */ - if (lrnver < lnver && lrnver < rnver) - { - RFALSE( h && - (tb->lnum[h] != 1 || - tb->rnum[h] != 1 || - lrnver != 1 || rnver != 2 || lnver != 2 || h != 1), - "vs-8230: bad h"); - if (lrset == LR_SHIFT_FLOW) - set_parameters (tb, h, tb->lnum[h], tb->rnum[h], lrnver, snum012 + lrset, - tb->lbytes, tb->rbytes); - else - set_parameters (tb, h, tb->lnum[h] - ((tb->lbytes == -1) ? 0 : 1), - tb->rnum[h] - ((tb->rbytes == -1) ? 0 : 1), lrnver, snum012 + lrset, -1, -1); - - return CARRY_ON; - } + /* calculate number of blocks S[h] must be split into when + items are shifted in both directions, + as well as number of items in each part of the splitted node (s012 numbers), + and number of bytes (s1bytes) of the shared drop which flow to S1 if any + */ + lrset = LR_SHIFT_NO_FLOW; + lrnver = get_num_ver(vn->vn_mode, tb, h, + lpar - ((h || tb->lbytes == -1) ? 0 : 1), + -1, + h ? (vn->vn_nr_item - rpar) : (rpar - + ((tb-> + rbytes != + -1) ? 1 : + 0)), -1, + snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); + if (!h) { + int lrnver1; + + lrnver1 = get_num_ver(vn->vn_mode, tb, h, + lpar - + ((tb->lbytes != -1) ? 1 : 0), + tb->lbytes, + (rpar - + ((tb->rbytes != -1) ? 1 : 0)), + tb->rbytes, + snum012 + LR_SHIFT_FLOW, FLOW); + if (lrnver > lrnver1) + lrset = LR_SHIFT_FLOW, lrnver = lrnver1; + } - /* if shifting doesn't lead to better packing then don't shift */ - if (nver == lrnver) - { - set_parameters (tb, h, 0, 0, nver, snum012 + nset, -1, -1); - return CARRY_ON; - } + /* Our general shifting strategy is: + 1) to minimized number of new nodes; + 2) to minimized number of neighbors involved in shifting; + 3) to minimized number of disk reads; */ + + /* we can win TWO or ONE nodes by shifting in both directions */ + if (lrnver < lnver && lrnver < rnver) { + RFALSE(h && + (tb->lnum[h] != 1 || + tb->rnum[h] != 1 || + lrnver != 1 || rnver != 2 || lnver != 2 + || h != 1), "vs-8230: bad h"); + if (lrset == LR_SHIFT_FLOW) + set_parameters(tb, h, tb->lnum[h], tb->rnum[h], + lrnver, snum012 + lrset, + tb->lbytes, tb->rbytes); + else + set_parameters(tb, h, + tb->lnum[h] - + ((tb->lbytes == -1) ? 0 : 1), + tb->rnum[h] - + ((tb->rbytes == -1) ? 0 : 1), + lrnver, snum012 + lrset, -1, -1); + + return CARRY_ON; + } + /* if shifting doesn't lead to better packing then don't shift */ + if (nver == lrnver) { + set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, + -1); + return CARRY_ON; + } - /* now we know that for better packing shifting in only one - direction either to the left or to the right is required */ + /* now we know that for better packing shifting in only one + direction either to the left or to the right is required */ - /* if shifting to the left is better than shifting to the right */ - if (lnver < rnver) - { - SET_PAR_SHIFT_LEFT; - return CARRY_ON; - } + /* if shifting to the left is better than shifting to the right */ + if (lnver < rnver) { + SET_PAR_SHIFT_LEFT; + return CARRY_ON; + } - /* if shifting to the right is better than shifting to the left */ - if (lnver > rnver) - { - SET_PAR_SHIFT_RIGHT; - return CARRY_ON; - } + /* if shifting to the right is better than shifting to the left */ + if (lnver > rnver) { + SET_PAR_SHIFT_RIGHT; + return CARRY_ON; + } + /* now shifting in either direction gives the same number + of nodes and we can make use of the cached neighbors */ + if (is_left_neighbor_in_cache(tb, h)) { + SET_PAR_SHIFT_LEFT; + return CARRY_ON; + } - /* now shifting in either direction gives the same number - of nodes and we can make use of the cached neighbors */ - if (is_left_neighbor_in_cache (tb,h)) - { - SET_PAR_SHIFT_LEFT; - return CARRY_ON; + /* shift to the right independently on whether the right neighbor in cache or not */ + SET_PAR_SHIFT_RIGHT; + return CARRY_ON; } - - /* shift to the right independently on whether the right neighbor in cache or not */ - SET_PAR_SHIFT_RIGHT; - return CARRY_ON; - } } - /* Check whether current node S[h] is balanced when Decreasing its size by * Deleting or Cutting for INTERNAL node of S+tree. * Calculate parameters for balancing for current level h. @@ -1513,157 +1553,173 @@ static int ip_check_balance (struct tree_balance * tb, int h) * Note: Items of internal nodes have fixed size, so the balance condition for * the internal part of S+tree is as for the B-trees. */ -static int dc_check_balance_internal (struct tree_balance * tb, int h) +static int dc_check_balance_internal(struct tree_balance *tb, int h) { - struct virtual_node * vn = tb->tb_vn; + struct virtual_node *vn = tb->tb_vn; - /* Sh is the node whose balance is currently being checked, - and Fh is its father. */ - struct buffer_head * Sh, * Fh; - int maxsize, - n_ret_value; - int lfree, rfree /* free space in L and R */; + /* Sh is the node whose balance is currently being checked, + and Fh is its father. */ + struct buffer_head *Sh, *Fh; + int maxsize, n_ret_value; + int lfree, rfree /* free space in L and R */ ; - Sh = PATH_H_PBUFFER (tb->tb_path, h); - Fh = PATH_H_PPARENT (tb->tb_path, h); + Sh = PATH_H_PBUFFER(tb->tb_path, h); + Fh = PATH_H_PPARENT(tb->tb_path, h); - maxsize = MAX_CHILD_SIZE(Sh); + maxsize = MAX_CHILD_SIZE(Sh); /* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ /* new_nr_item = number of items node would have if operation is */ /* performed without balancing (new_nr_item); */ - create_virtual_node (tb, h); + create_virtual_node(tb, h); - if ( ! Fh ) - { /* S[h] is the root. */ - if ( vn->vn_nr_item > 0 ) - { - set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ + if (!Fh) { /* S[h] is the root. */ + if (vn->vn_nr_item > 0) { + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ + } + /* new_nr_item == 0. + * Current root will be deleted resulting in + * decrementing the tree height. */ + set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); + return CARRY_ON; + } + + if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) + return n_ret_value; + + /* get free space of neighbors */ + rfree = get_rfree(tb, h); + lfree = get_lfree(tb, h); + + /* determine maximal number of items we can fit into neighbors */ + check_left(tb, h, lfree); + check_right(tb, h, rfree); + + if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid. + * In this case we balance only if it leads to better packing. */ + if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors, + * which is impossible with greater values of new_nr_item. */ + if (tb->lnum[h] >= vn->vn_nr_item + 1) { + /* All contents of S[h] can be moved to L[h]. */ + int n; + int order_L; + + order_L = + ((n = + PATH_H_B_ITEM_ORDER(tb->tb_path, + h)) == + 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; + n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / + (DC_SIZE + KEY_SIZE); + set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, + -1); + return CARRY_ON; + } + + if (tb->rnum[h] >= vn->vn_nr_item + 1) { + /* All contents of S[h] can be moved to R[h]. */ + int n; + int order_R; + + order_R = + ((n = + PATH_H_B_ITEM_ORDER(tb->tb_path, + h)) == + B_NR_ITEMS(Fh)) ? 0 : n + 1; + n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / + (DC_SIZE + KEY_SIZE); + set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, + -1); + return CARRY_ON; + } + } + + if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { + /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ + int to_r; + + to_r = + ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - + tb->rnum[h] + vn->vn_nr_item + 1) / 2 - + (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); + set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, + 0, NULL, -1, -1); + return CARRY_ON; + } + + /* Balancing does not lead to better packing. */ + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; } - /* new_nr_item == 0. - * Current root will be deleted resulting in - * decrementing the tree height. */ - set_parameters (tb, h, 0, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON ) - return n_ret_value; - - - /* get free space of neighbors */ - rfree = get_rfree (tb, h); - lfree = get_lfree (tb, h); - - /* determine maximal number of items we can fit into neighbors */ - check_left (tb, h, lfree); - check_right (tb, h, rfree); - - - if ( vn->vn_nr_item >= MIN_NR_KEY(Sh) ) - { /* Balance condition for the internal node is valid. - * In this case we balance only if it leads to better packing. */ - if ( vn->vn_nr_item == MIN_NR_KEY(Sh) ) - { /* Here we join S[h] with one of its neighbors, - * which is impossible with greater values of new_nr_item. */ - if ( tb->lnum[h] >= vn->vn_nr_item + 1 ) - { - /* All contents of S[h] can be moved to L[h]. */ - int n; - int order_L; - - order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; - n = dc_size(B_N_CHILD(tb->FL[h],order_L)) / (DC_SIZE + KEY_SIZE); - set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - if ( tb->rnum[h] >= vn->vn_nr_item + 1 ) - { - /* All contents of S[h] can be moved to R[h]. */ - int n; - int order_R; - - order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : n + 1; - n = dc_size(B_N_CHILD(tb->FR[h],order_R)) / (DC_SIZE + KEY_SIZE); - set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1); - return CARRY_ON; - } + + /* Current node contain insufficient number of items. Balancing is required. */ + /* Check whether we can merge S[h] with left neighbor. */ + if (tb->lnum[h] >= vn->vn_nr_item + 1) + if (is_left_neighbor_in_cache(tb, h) + || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) { + int n; + int order_L; + + order_L = + ((n = + PATH_H_B_ITEM_ORDER(tb->tb_path, + h)) == + 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; + n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE + + KEY_SIZE); + set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* Check whether we can merge S[h] with right neighbor. */ + if (tb->rnum[h] >= vn->vn_nr_item + 1) { + int n; + int order_R; + + order_R = + ((n = + PATH_H_B_ITEM_ORDER(tb->tb_path, + h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1); + n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE + + KEY_SIZE); + set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1); + return CARRY_ON; } - if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) - { - /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ - int to_r; + /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ + if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { + int to_r; + + to_r = + ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - + tb->rnum[h]); + set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, + -1, -1); + return CARRY_ON; + } - to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - - (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); - set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1); - return CARRY_ON; + /* For internal nodes try to borrow item from a neighbor */ + RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root"); + + /* Borrow one or two items from caching neighbor */ + if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) { + int from_l; + + from_l = + (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + + 1) / 2 - (vn->vn_nr_item + 1); + set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1); + return CARRY_ON; } - /* Balancing does not lead to better packing. */ - set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - - /* Current node contain insufficient number of items. Balancing is required. */ - /* Check whether we can merge S[h] with left neighbor. */ - if (tb->lnum[h] >= vn->vn_nr_item + 1) - if (is_left_neighbor_in_cache (tb,h) || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) - { - int n; - int order_L; - - order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; - n = dc_size(B_N_CHILD(tb->FL[h],order_L)) / (DC_SIZE + KEY_SIZE); - set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1); + set_parameters(tb, h, 0, + -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item + + 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1); return CARRY_ON; - } - - /* Check whether we can merge S[h] with right neighbor. */ - if (tb->rnum[h] >= vn->vn_nr_item + 1) - { - int n; - int order_R; - - order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : (n + 1); - n = dc_size(B_N_CHILD(tb->FR[h],order_R)) / (DC_SIZE + KEY_SIZE); - set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ - if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) - { - int to_r; - - to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - - (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); - set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* For internal nodes try to borrow item from a neighbor */ - RFALSE( !tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root"); - - /* Borrow one or two items from caching neighbor */ - if (is_left_neighbor_in_cache (tb,h) || !tb->FR[h]) - { - int from_l; - - from_l = (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1) / 2 - (vn->vn_nr_item + 1); - set_parameters (tb, h, -from_l, 0, 1, NULL, -1, -1); - return CARRY_ON; - } - - set_parameters (tb, h, 0, -((MAX_NR_KEY(Sh)+1-tb->rnum[h]+vn->vn_nr_item+1)/2-(vn->vn_nr_item+1)), 1, - NULL, -1, -1); - return CARRY_ON; } - /* Check whether current node S[h] is balanced when Decreasing its size by * Deleting or Truncating for LEAF node of S+tree. * Calculate parameters for balancing for current level h. @@ -1677,90 +1733,86 @@ static int dc_check_balance_internal (struct tree_balance * tb, int h) * -1 - no balancing for higher levels needed; * -2 - no disk space. */ -static int dc_check_balance_leaf (struct tree_balance * tb, int h) +static int dc_check_balance_leaf(struct tree_balance *tb, int h) { - struct virtual_node * vn = tb->tb_vn; - - /* Number of bytes that must be deleted from - (value is negative if bytes are deleted) buffer which - contains node being balanced. The mnemonic is that the - attempted change in node space used level is levbytes bytes. */ - int levbytes; - /* the maximal item size */ - int maxsize, - n_ret_value; - /* S0 is the node whose balance is currently being checked, - and F0 is its father. */ - struct buffer_head * S0, * F0; - int lfree, rfree /* free space in L and R */; - - S0 = PATH_H_PBUFFER (tb->tb_path, 0); - F0 = PATH_H_PPARENT (tb->tb_path, 0); - - levbytes = tb->insert_size[h]; - - maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */ - - if ( ! F0 ) - { /* S[0] is the root now. */ - - RFALSE( -levbytes >= maxsize - B_FREE_SPACE (S0), - "vs-8240: attempt to create empty buffer tree"); - - set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - - if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON ) - return n_ret_value; - - /* get free space of neighbors */ - rfree = get_rfree (tb, h); - lfree = get_lfree (tb, h); - - create_virtual_node (tb, h); - - /* if 3 leaves can be merge to one, set parameters and return */ - if (are_leaves_removable (tb, lfree, rfree)) - return CARRY_ON; - - /* determine maximal number of items we can shift to the left/right neighbor - and the maximal number of bytes that can flow to the left/right neighbor - from the left/right most liquid item that cannot be shifted from S[0] entirely - */ - check_left (tb, h, lfree); - check_right (tb, h, rfree); - - /* check whether we can merge S with left neighbor. */ - if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) - if (is_left_neighbor_in_cache (tb,h) || - ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */ - !tb->FR[h]) { - - RFALSE( !tb->FL[h], "vs-8245: dc_check_balance_leaf: FL[h] must exist"); - - /* set parameter to merge S[0] with its left neighbor */ - set_parameters (tb, h, -1, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* check whether we can merge S[0] with right neighbor. */ - if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { - set_parameters (tb, h, 0, -1, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ - if (is_leaf_removable (tb)) - return CARRY_ON; - - /* Balancing is not required. */ - tb->s0num = vn->vn_nr_item; - set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; -} + struct virtual_node *vn = tb->tb_vn; + + /* Number of bytes that must be deleted from + (value is negative if bytes are deleted) buffer which + contains node being balanced. The mnemonic is that the + attempted change in node space used level is levbytes bytes. */ + int levbytes; + /* the maximal item size */ + int maxsize, n_ret_value; + /* S0 is the node whose balance is currently being checked, + and F0 is its father. */ + struct buffer_head *S0, *F0; + int lfree, rfree /* free space in L and R */ ; + + S0 = PATH_H_PBUFFER(tb->tb_path, 0); + F0 = PATH_H_PPARENT(tb->tb_path, 0); + levbytes = tb->insert_size[h]; + maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */ + + if (!F0) { /* S[0] is the root now. */ + + RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0), + "vs-8240: attempt to create empty buffer tree"); + + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; + } + + if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) + return n_ret_value; + + /* get free space of neighbors */ + rfree = get_rfree(tb, h); + lfree = get_lfree(tb, h); + + create_virtual_node(tb, h); + + /* if 3 leaves can be merge to one, set parameters and return */ + if (are_leaves_removable(tb, lfree, rfree)) + return CARRY_ON; + + /* determine maximal number of items we can shift to the left/right neighbor + and the maximal number of bytes that can flow to the left/right neighbor + from the left/right most liquid item that cannot be shifted from S[0] entirely + */ + check_left(tb, h, lfree); + check_right(tb, h, rfree); + + /* check whether we can merge S with left neighbor. */ + if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) + if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */ + !tb->FR[h]) { + + RFALSE(!tb->FL[h], + "vs-8245: dc_check_balance_leaf: FL[h] must exist"); + + /* set parameter to merge S[0] with its left neighbor */ + set_parameters(tb, h, -1, 0, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* check whether we can merge S[0] with right neighbor. */ + if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { + set_parameters(tb, h, 0, -1, 0, NULL, -1, -1); + return CARRY_ON; + } + + /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ + if (is_leaf_removable(tb)) + return CARRY_ON; + + /* Balancing is not required. */ + tb->s0num = vn->vn_nr_item; + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; +} /* Check whether current node S[h] is balanced when Decreasing its size by * Deleting or Cutting. @@ -1775,18 +1827,17 @@ static int dc_check_balance_leaf (struct tree_balance * tb, int h) * -1 - no balancing for higher levels needed; * -2 - no disk space. */ -static int dc_check_balance (struct tree_balance * tb, int h) +static int dc_check_balance(struct tree_balance *tb, int h) { - RFALSE( ! (PATH_H_PBUFFER (tb->tb_path, h)), "vs-8250: S is not initialized"); + RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)), + "vs-8250: S is not initialized"); - if ( h ) - return dc_check_balance_internal (tb, h); - else - return dc_check_balance_leaf (tb, h); + if (h) + return dc_check_balance_internal(tb, h); + else + return dc_check_balance_leaf(tb, h); } - - /* Check whether current node S[h] is balanced. * Calculate parameters for balancing for current level h. * Parameters: @@ -1805,83 +1856,80 @@ static int dc_check_balance (struct tree_balance * tb, int h) * -1 - no balancing for higher levels needed; * -2 - no disk space. */ -static int check_balance (int mode, - struct tree_balance * tb, - int h, - int inum, - int pos_in_item, - struct item_head * ins_ih, - const void * data - ) +static int check_balance(int mode, + struct tree_balance *tb, + int h, + int inum, + int pos_in_item, + struct item_head *ins_ih, const void *data) { - struct virtual_node * vn; + struct virtual_node *vn; - vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); - vn->vn_free_ptr = (char *)(tb->tb_vn + 1); - vn->vn_mode = mode; - vn->vn_affected_item_num = inum; - vn->vn_pos_in_item = pos_in_item; - vn->vn_ins_ih = ins_ih; - vn->vn_data = data; + vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); + vn->vn_free_ptr = (char *)(tb->tb_vn + 1); + vn->vn_mode = mode; + vn->vn_affected_item_num = inum; + vn->vn_pos_in_item = pos_in_item; + vn->vn_ins_ih = ins_ih; + vn->vn_data = data; - RFALSE( mode == M_INSERT && !vn->vn_ins_ih, - "vs-8255: ins_ih can not be 0 in insert mode"); + RFALSE(mode == M_INSERT && !vn->vn_ins_ih, + "vs-8255: ins_ih can not be 0 in insert mode"); - if ( tb->insert_size[h] > 0 ) - /* Calculate balance parameters when size of node is increasing. */ - return ip_check_balance (tb, h); + if (tb->insert_size[h] > 0) + /* Calculate balance parameters when size of node is increasing. */ + return ip_check_balance(tb, h); - /* Calculate balance parameters when size of node is decreasing. */ - return dc_check_balance (tb, h); + /* Calculate balance parameters when size of node is decreasing. */ + return dc_check_balance(tb, h); } +/* Check whether parent at the path is the really parent of the current node.*/ +static int get_direct_parent(struct tree_balance *p_s_tb, int n_h) +{ + struct buffer_head *p_s_bh; + struct path *p_s_path = p_s_tb->tb_path; + int n_position, + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); + + /* We are in the root or in the new root. */ + if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { + + RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, + "PAP-8260: invalid offset in the path"); + + if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)-> + b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) { + /* Root is not changed. */ + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL; + PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0; + return CARRY_ON; + } + return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ + } + + if (!B_IS_IN_TREE + (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))) + return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ + if ((n_position = + PATH_OFFSET_POSITION(p_s_path, + n_path_offset - 1)) > B_NR_ITEMS(p_s_bh)) + return REPEAT_SEARCH; -/* Check whether parent at the path is the really parent of the current node.*/ -static int get_direct_parent( - struct tree_balance * p_s_tb, - int n_h - ) { - struct buffer_head * p_s_bh; - struct path * p_s_path = p_s_tb->tb_path; - int n_position, - n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); - - /* We are in the root or in the new root. */ - if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) { - - RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, - "PAP-8260: invalid offset in the path"); - - if ( PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == - SB_ROOT_BLOCK (p_s_tb->tb_sb) ) { - /* Root is not changed. */ - PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL; - PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0; - return CARRY_ON; + if (B_N_CHILD_NUM(p_s_bh, n_position) != + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr) + /* Parent in the path is not parent of the current node in the tree. */ + return REPEAT_SEARCH; + + if (buffer_locked(p_s_bh)) { + __wait_on_buffer(p_s_bh); + if (FILESYSTEM_CHANGED_TB(p_s_tb)) + return REPEAT_SEARCH; } - return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ - } - - if ( ! B_IS_IN_TREE(p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)) ) - return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ - - if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) > B_NR_ITEMS(p_s_bh) ) - return REPEAT_SEARCH; - - if ( B_N_CHILD_NUM(p_s_bh, n_position) != PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr ) - /* Parent in the path is not parent of the current node in the tree. */ - return REPEAT_SEARCH; - - if ( buffer_locked(p_s_bh) ) { - __wait_on_buffer(p_s_bh); - if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) - return REPEAT_SEARCH; - } - - return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ -} + return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ +} /* Using lnum[n_h] and rnum[n_h] we should determine what neighbors * of S[n_h] we @@ -1889,356 +1937,401 @@ static int get_direct_parent( * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; * CARRY_ON - schedule didn't occur while the function worked; */ -static int get_neighbors( - struct tree_balance * p_s_tb, - int n_h - ) { - int n_child_position, - n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); - unsigned long n_son_number; - struct super_block * p_s_sb = p_s_tb->tb_sb; - struct buffer_head * p_s_bh; - - - PROC_INFO_INC( p_s_sb, get_neighbors[ n_h ] ); - - if ( p_s_tb->lnum[n_h] ) { - /* We need left neighbor to balance S[n_h]. */ - PROC_INFO_INC( p_s_sb, need_l_neighbor[ n_h ] ); - p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); - - RFALSE( p_s_bh == p_s_tb->FL[n_h] && - ! PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset), - "PAP-8270: invalid position in the parent"); - - n_child_position = ( p_s_bh == p_s_tb->FL[n_h] ) ? p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]); - n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); - p_s_bh = sb_bread(p_s_sb, n_son_number); - if (!p_s_bh) - return IO_ERROR; - if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { - decrement_bcount(p_s_bh); - PROC_INFO_INC( p_s_sb, get_neighbors_restart[ n_h ] ); - return REPEAT_SEARCH; +static int get_neighbors(struct tree_balance *p_s_tb, int n_h) +{ + int n_child_position, + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); + unsigned long n_son_number; + struct super_block *p_s_sb = p_s_tb->tb_sb; + struct buffer_head *p_s_bh; + + PROC_INFO_INC(p_s_sb, get_neighbors[n_h]); + + if (p_s_tb->lnum[n_h]) { + /* We need left neighbor to balance S[n_h]. */ + PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]); + p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); + + RFALSE(p_s_bh == p_s_tb->FL[n_h] && + !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset), + "PAP-8270: invalid position in the parent"); + + n_child_position = + (p_s_bh == + p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb-> + FL[n_h]); + n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); + p_s_bh = sb_bread(p_s_sb, n_son_number); + if (!p_s_bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + decrement_bcount(p_s_bh); + PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } + + RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) || + n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || + B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != + p_s_bh->b_blocknr, "PAP-8275: invalid parent"); + RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child"); + RFALSE(!n_h && + B_FREE_SPACE(p_s_bh) != + MAX_CHILD_SIZE(p_s_bh) - + dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)), + "PAP-8290: invalid child size of left neighbor"); + + decrement_bcount(p_s_tb->L[n_h]); + p_s_tb->L[n_h] = p_s_bh; } - - RFALSE( ! B_IS_IN_TREE(p_s_tb->FL[n_h]) || - n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || - B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != - p_s_bh->b_blocknr, "PAP-8275: invalid parent"); - RFALSE( ! B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child"); - RFALSE( ! n_h && - B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - dc_size(B_N_CHILD (p_s_tb->FL[0],n_child_position)), - "PAP-8290: invalid child size of left neighbor"); - - decrement_bcount(p_s_tb->L[n_h]); - p_s_tb->L[n_h] = p_s_bh; - } - - - if ( p_s_tb->rnum[n_h] ) { /* We need right neighbor to balance S[n_path_offset]. */ - PROC_INFO_INC( p_s_sb, need_r_neighbor[ n_h ] ); - p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); - - RFALSE( p_s_bh == p_s_tb->FR[n_h] && - PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) >= B_NR_ITEMS(p_s_bh), - "PAP-8295: invalid position in the parent"); - - n_child_position = ( p_s_bh == p_s_tb->FR[n_h] ) ? p_s_tb->rkey[n_h] + 1 : 0; - n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); - p_s_bh = sb_bread(p_s_sb, n_son_number); - if (!p_s_bh) - return IO_ERROR; - if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { - decrement_bcount(p_s_bh); - PROC_INFO_INC( p_s_sb, get_neighbors_restart[ n_h ] ); - return REPEAT_SEARCH; + + if (p_s_tb->rnum[n_h]) { /* We need right neighbor to balance S[n_path_offset]. */ + PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]); + p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); + + RFALSE(p_s_bh == p_s_tb->FR[n_h] && + PATH_OFFSET_POSITION(p_s_tb->tb_path, + n_path_offset) >= + B_NR_ITEMS(p_s_bh), + "PAP-8295: invalid position in the parent"); + + n_child_position = + (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0; + n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); + p_s_bh = sb_bread(p_s_sb, n_son_number); + if (!p_s_bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + decrement_bcount(p_s_bh); + PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } + decrement_bcount(p_s_tb->R[n_h]); + p_s_tb->R[n_h] = p_s_bh; + + RFALSE(!n_h + && B_FREE_SPACE(p_s_bh) != + MAX_CHILD_SIZE(p_s_bh) - + dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)), + "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", + B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh), + dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position))); + } - decrement_bcount(p_s_tb->R[n_h]); - p_s_tb->R[n_h] = p_s_bh; - - RFALSE( ! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - dc_size(B_N_CHILD (p_s_tb->FR[0],n_child_position)), - "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", - B_FREE_SPACE (p_s_bh), MAX_CHILD_SIZE (p_s_bh), - dc_size(B_N_CHILD (p_s_tb->FR[0],n_child_position))); - - } - return CARRY_ON; + return CARRY_ON; } #ifdef CONFIG_REISERFS_CHECK -void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s) +void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s) { - void * vp; - static size_t malloced; - - - vp = kmalloc (size, flags); - if (vp) { - REISERFS_SB(s)->s_kmallocs += size; - if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) { - reiserfs_warning (s, - "vs-8301: reiserfs_kmalloc: allocated memory %d", - REISERFS_SB(s)->s_kmallocs); - malloced = REISERFS_SB(s)->s_kmallocs; + void *vp; + static size_t malloced; + + vp = kmalloc(size, flags); + if (vp) { + REISERFS_SB(s)->s_kmallocs += size; + if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) { + reiserfs_warning(s, + "vs-8301: reiserfs_kmalloc: allocated memory %d", + REISERFS_SB(s)->s_kmallocs); + malloced = REISERFS_SB(s)->s_kmallocs; + } } - } - return vp; + return vp; } -void reiserfs_kfree (const void * vp, size_t size, struct super_block * s) +void reiserfs_kfree(const void *vp, size_t size, struct super_block *s) { - kfree (vp); - - REISERFS_SB(s)->s_kmallocs -= size; - if (REISERFS_SB(s)->s_kmallocs < 0) - reiserfs_warning (s, "vs-8302: reiserfs_kfree: allocated memory %d", - REISERFS_SB(s)->s_kmallocs); + kfree(vp); + + REISERFS_SB(s)->s_kmallocs -= size; + if (REISERFS_SB(s)->s_kmallocs < 0) + reiserfs_warning(s, + "vs-8302: reiserfs_kfree: allocated memory %d", + REISERFS_SB(s)->s_kmallocs); } #endif - -static int get_virtual_node_size (struct super_block * sb, struct buffer_head * bh) +static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) { - int max_num_of_items; - int max_num_of_entries; - unsigned long blocksize = sb->s_blocksize; + int max_num_of_items; + int max_num_of_entries; + unsigned long blocksize = sb->s_blocksize; #define MIN_NAME_LEN 1 - max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); - max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / - (DEH_SIZE + MIN_NAME_LEN); + max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); + max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / + (DEH_SIZE + MIN_NAME_LEN); - return sizeof(struct virtual_node) + - max(max_num_of_items * sizeof (struct virtual_item), - sizeof (struct virtual_item) + sizeof(struct direntry_uarea) + - (max_num_of_entries - 1) * sizeof (__u16)); + return sizeof(struct virtual_node) + + max(max_num_of_items * sizeof(struct virtual_item), + sizeof(struct virtual_item) + sizeof(struct direntry_uarea) + + (max_num_of_entries - 1) * sizeof(__u16)); } - - /* maybe we should fail balancing we are going to perform when kmalloc fails several times. But now it will loop until kmalloc gets required memory */ -static int get_mem_for_virtual_node (struct tree_balance * tb) +static int get_mem_for_virtual_node(struct tree_balance *tb) { - int check_fs = 0; - int size; - char * buf; - - size = get_virtual_node_size (tb->tb_sb, PATH_PLAST_BUFFER (tb->tb_path)); - - if (size > tb->vn_buf_size) { - /* we have to allocate more memory for virtual node */ - if (tb->vn_buf) { - /* free memory allocated before */ - reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb); - /* this is not needed if kfree is atomic */ - check_fs = 1; - } + int check_fs = 0; + int size; + char *buf; + + size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); + + if (size > tb->vn_buf_size) { + /* we have to allocate more memory for virtual node */ + if (tb->vn_buf) { + /* free memory allocated before */ + reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb); + /* this is not needed if kfree is atomic */ + check_fs = 1; + } - /* virtual node requires now more memory */ - tb->vn_buf_size = size; - - /* get memory for virtual item */ - buf = reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN, tb->tb_sb); - if ( ! buf ) { - /* getting memory with GFP_KERNEL priority may involve - balancing now (due to indirect_to_direct conversion on - dcache shrinking). So, release path and collected - resources here */ - free_buffers_in_tb (tb); - buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb); - if ( !buf ) { + /* virtual node requires now more memory */ + tb->vn_buf_size = size; + + /* get memory for virtual item */ + buf = + reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN, + tb->tb_sb); + if (!buf) { + /* getting memory with GFP_KERNEL priority may involve + balancing now (due to indirect_to_direct conversion on + dcache shrinking). So, release path and collected + resources here */ + free_buffers_in_tb(tb); + buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb); + if (!buf) { #ifdef CONFIG_REISERFS_CHECK - reiserfs_warning (tb->tb_sb, - "vs-8345: get_mem_for_virtual_node: " - "kmalloc failed. reiserfs kmalloced %d bytes", - REISERFS_SB(tb->tb_sb)->s_kmallocs); + reiserfs_warning(tb->tb_sb, + "vs-8345: get_mem_for_virtual_node: " + "kmalloc failed. reiserfs kmalloced %d bytes", + REISERFS_SB(tb->tb_sb)-> + s_kmallocs); #endif - tb->vn_buf_size = 0; - } - tb->vn_buf = buf; - schedule() ; - return REPEAT_SEARCH; - } + tb->vn_buf_size = 0; + } + tb->vn_buf = buf; + schedule(); + return REPEAT_SEARCH; + } - tb->vn_buf = buf; - } + tb->vn_buf = buf; + } - if ( check_fs && FILESYSTEM_CHANGED_TB (tb) ) - return REPEAT_SEARCH; + if (check_fs && FILESYSTEM_CHANGED_TB(tb)) + return REPEAT_SEARCH; - return CARRY_ON; + return CARRY_ON; } - #ifdef CONFIG_REISERFS_CHECK -static void tb_buffer_sanity_check (struct super_block * p_s_sb, - struct buffer_head * p_s_bh, - const char *descr, int level) { - if (p_s_bh) { - if (atomic_read (&(p_s_bh->b_count)) <= 0) { - - reiserfs_panic (p_s_sb, "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", descr, level, p_s_bh); - } - - if ( ! buffer_uptodate (p_s_bh) ) { - reiserfs_panic (p_s_sb, "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", descr, level, p_s_bh); - } - - if ( ! B_IS_IN_TREE (p_s_bh) ) { - reiserfs_panic (p_s_sb, "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", descr, level, p_s_bh); - } - - if (p_s_bh->b_bdev != p_s_sb->s_bdev) { - reiserfs_panic (p_s_sb, "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n", descr, level, p_s_bh); - } - - if (p_s_bh->b_size != p_s_sb->s_blocksize) { - reiserfs_panic (p_s_sb, "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n", descr, level, p_s_bh); - } - - if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { - reiserfs_panic (p_s_sb, "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n", descr, level, p_s_bh); - } - } -} -#else -static void tb_buffer_sanity_check (struct super_block * p_s_sb, - struct buffer_head * p_s_bh, - const char *descr, int level) -{;} -#endif - -static int clear_all_dirty_bits(struct super_block *s, - struct buffer_head *bh) { - return reiserfs_prepare_for_journal(s, bh, 0) ; -} - -static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb) +static void tb_buffer_sanity_check(struct super_block *p_s_sb, + struct buffer_head *p_s_bh, + const char *descr, int level) { - struct buffer_head * locked; -#ifdef CONFIG_REISERFS_CHECK - int repeat_counter = 0; -#endif - int i; + if (p_s_bh) { + if (atomic_read(&(p_s_bh->b_count)) <= 0) { - do { - - locked = NULL; - - for ( i = p_s_tb->tb_path->path_length; !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i-- ) { - if ( PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i) ) { - /* if I understand correctly, we can only be sure the last buffer - ** in the path is in the tree --clm - */ -#ifdef CONFIG_REISERFS_CHECK - if (PATH_PLAST_BUFFER(p_s_tb->tb_path) == - PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { - tb_buffer_sanity_check (p_s_tb->tb_sb, - PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i), - "S", - p_s_tb->tb_path->path_length - i); + reiserfs_panic(p_s_sb, + "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", + descr, level, p_s_bh); } -#endif - if (!clear_all_dirty_bits(p_s_tb->tb_sb, - PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i))) - { - locked = PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i); - } - } - } - for ( i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; i++ ) { + if (!buffer_uptodate(p_s_bh)) { + reiserfs_panic(p_s_sb, + "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", + descr, level, p_s_bh); + } - if (p_s_tb->lnum[i] ) { + if (!B_IS_IN_TREE(p_s_bh)) { + reiserfs_panic(p_s_sb, + "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", + descr, level, p_s_bh); + } - if ( p_s_tb->L[i] ) { - tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->L[i], "L", i); - if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->L[i])) - locked = p_s_tb->L[i]; + if (p_s_bh->b_bdev != p_s_sb->s_bdev) { + reiserfs_panic(p_s_sb, + "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n", + descr, level, p_s_bh); } - if ( !locked && p_s_tb->FL[i] ) { - tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FL[i], "FL", i); - if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FL[i])) - locked = p_s_tb->FL[i]; + if (p_s_bh->b_size != p_s_sb->s_blocksize) { + reiserfs_panic(p_s_sb, + "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n", + descr, level, p_s_bh); } - if ( !locked && p_s_tb->CFL[i] ) { - tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFL[i], "CFL", i); - if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFL[i])) - locked = p_s_tb->CFL[i]; + if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { + reiserfs_panic(p_s_sb, + "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n", + descr, level, p_s_bh); } + } +} +#else +static void tb_buffer_sanity_check(struct super_block *p_s_sb, + struct buffer_head *p_s_bh, + const char *descr, int level) +{; +} +#endif - } +static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh) +{ + return reiserfs_prepare_for_journal(s, bh, 0); +} - if ( !locked && (p_s_tb->rnum[i]) ) { +static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb) +{ + struct buffer_head *locked; +#ifdef CONFIG_REISERFS_CHECK + int repeat_counter = 0; +#endif + int i; - if ( p_s_tb->R[i] ) { - tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->R[i], "R", i); - if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->R[i])) - locked = p_s_tb->R[i]; - } + do { - - if ( !locked && p_s_tb->FR[i] ) { - tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FR[i], "FR", i); - if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FR[i])) - locked = p_s_tb->FR[i]; + locked = NULL; + + for (i = p_s_tb->tb_path->path_length; + !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { + if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { + /* if I understand correctly, we can only be sure the last buffer + ** in the path is in the tree --clm + */ +#ifdef CONFIG_REISERFS_CHECK + if (PATH_PLAST_BUFFER(p_s_tb->tb_path) == + PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { + tb_buffer_sanity_check(p_s_tb->tb_sb, + PATH_OFFSET_PBUFFER + (p_s_tb->tb_path, + i), "S", + p_s_tb->tb_path-> + path_length - i); + } +#endif + if (!clear_all_dirty_bits(p_s_tb->tb_sb, + PATH_OFFSET_PBUFFER + (p_s_tb->tb_path, + i))) { + locked = + PATH_OFFSET_PBUFFER(p_s_tb->tb_path, + i); + } + } } - if ( !locked && p_s_tb->CFR[i] ) { - tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFR[i], "CFR", i); - if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFR[i])) - locked = p_s_tb->CFR[i]; + for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; + i++) { + + if (p_s_tb->lnum[i]) { + + if (p_s_tb->L[i]) { + tb_buffer_sanity_check(p_s_tb->tb_sb, + p_s_tb->L[i], + "L", i); + if (!clear_all_dirty_bits + (p_s_tb->tb_sb, p_s_tb->L[i])) + locked = p_s_tb->L[i]; + } + + if (!locked && p_s_tb->FL[i]) { + tb_buffer_sanity_check(p_s_tb->tb_sb, + p_s_tb->FL[i], + "FL", i); + if (!clear_all_dirty_bits + (p_s_tb->tb_sb, p_s_tb->FL[i])) + locked = p_s_tb->FL[i]; + } + + if (!locked && p_s_tb->CFL[i]) { + tb_buffer_sanity_check(p_s_tb->tb_sb, + p_s_tb->CFL[i], + "CFL", i); + if (!clear_all_dirty_bits + (p_s_tb->tb_sb, p_s_tb->CFL[i])) + locked = p_s_tb->CFL[i]; + } + + } + + if (!locked && (p_s_tb->rnum[i])) { + + if (p_s_tb->R[i]) { + tb_buffer_sanity_check(p_s_tb->tb_sb, + p_s_tb->R[i], + "R", i); + if (!clear_all_dirty_bits + (p_s_tb->tb_sb, p_s_tb->R[i])) + locked = p_s_tb->R[i]; + } + + if (!locked && p_s_tb->FR[i]) { + tb_buffer_sanity_check(p_s_tb->tb_sb, + p_s_tb->FR[i], + "FR", i); + if (!clear_all_dirty_bits + (p_s_tb->tb_sb, p_s_tb->FR[i])) + locked = p_s_tb->FR[i]; + } + + if (!locked && p_s_tb->CFR[i]) { + tb_buffer_sanity_check(p_s_tb->tb_sb, + p_s_tb->CFR[i], + "CFR", i); + if (!clear_all_dirty_bits + (p_s_tb->tb_sb, p_s_tb->CFR[i])) + locked = p_s_tb->CFR[i]; + } + } + } + /* as far as I can tell, this is not required. The FEB list seems + ** to be full of newly allocated nodes, which will never be locked, + ** dirty, or anything else. + ** To be safe, I'm putting in the checks and waits in. For the moment, + ** they are needed to keep the code in journal.c from complaining + ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. + ** --clm + */ + for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { + if (p_s_tb->FEB[i]) { + if (!clear_all_dirty_bits + (p_s_tb->tb_sb, p_s_tb->FEB[i])) + locked = p_s_tb->FEB[i]; + } } - } - } - /* as far as I can tell, this is not required. The FEB list seems - ** to be full of newly allocated nodes, which will never be locked, - ** dirty, or anything else. - ** To be safe, I'm putting in the checks and waits in. For the moment, - ** they are needed to keep the code in journal.c from complaining - ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. - ** --clm - */ - for ( i = 0; !locked && i < MAX_FEB_SIZE; i++ ) { - if ( p_s_tb->FEB[i] ) { - if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FEB[i])) - locked = p_s_tb->FEB[i] ; - } - } - if (locked) { + if (locked) { #ifdef CONFIG_REISERFS_CHECK - repeat_counter++; - if ( (repeat_counter % 10000) == 0) { - reiserfs_warning (p_s_tb->tb_sb, - "wait_tb_buffers_until_released(): too many " - "iterations waiting for buffer to unlock " - "(%b)", locked); - - /* Don't loop forever. Try to recover from possible error. */ - - return ( FILESYSTEM_CHANGED_TB (p_s_tb) ) ? REPEAT_SEARCH : CARRY_ON; - } + repeat_counter++; + if ((repeat_counter % 10000) == 0) { + reiserfs_warning(p_s_tb->tb_sb, + "wait_tb_buffers_until_released(): too many " + "iterations waiting for buffer to unlock " + "(%b)", locked); + + /* Don't loop forever. Try to recover from possible error. */ + + return (FILESYSTEM_CHANGED_TB(p_s_tb)) ? + REPEAT_SEARCH : CARRY_ON; + } #endif - __wait_on_buffer (locked); - if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { - return REPEAT_SEARCH; - } - } + __wait_on_buffer(locked); + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + return REPEAT_SEARCH; + } + } - } while (locked); + } while (locked); - return CARRY_ON; + return CARRY_ON; } - /* Prepare for balancing, that is * get all necessary parents, and neighbors; * analyze what and where should be moved; @@ -2267,252 +2360,266 @@ static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb) * -1 - if no_disk_space */ +int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted + const void *data // inserted item or data to be pasted + ) +{ + int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path); + int n_pos_in_item; -int fix_nodes (int n_op_mode, - struct tree_balance * p_s_tb, - struct item_head * p_s_ins_ih, // item head of item being inserted - const void * data // inserted item or data to be pasted - ) { - int n_ret_value, - n_h, - n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path); - int n_pos_in_item; - - /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared - ** during wait_tb_buffers_run - */ - int wait_tb_buffers_run = 0 ; - struct buffer_head * p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path); - - ++ REISERFS_SB(p_s_tb -> tb_sb) -> s_fix_nodes; - - n_pos_in_item = p_s_tb->tb_path->pos_in_item; - - - p_s_tb->fs_gen = get_generation (p_s_tb->tb_sb); - - /* we prepare and log the super here so it will already be in the - ** transaction when do_balance needs to change it. - ** This way do_balance won't have to schedule when trying to prepare - ** the super for logging - */ - reiserfs_prepare_for_journal(p_s_tb->tb_sb, - SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1) ; - journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb, - SB_BUFFER_WITH_SB(p_s_tb->tb_sb)) ; - if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) - return REPEAT_SEARCH; - - /* if it possible in indirect_to_direct conversion */ - if (buffer_locked (p_s_tbS0)) { - __wait_on_buffer (p_s_tbS0); - if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) - return REPEAT_SEARCH; - } + /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared + ** during wait_tb_buffers_run + */ + int wait_tb_buffers_run = 0; + struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path); -#ifdef CONFIG_REISERFS_CHECK - if ( cur_tb ) { - print_cur_tb ("fix_nodes"); - reiserfs_panic(p_s_tb->tb_sb,"PAP-8305: fix_nodes: there is pending do_balance"); - } - - if (!buffer_uptodate (p_s_tbS0) || !B_IS_IN_TREE (p_s_tbS0)) { - reiserfs_panic (p_s_tb->tb_sb, "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate " - "at the beginning of fix_nodes or not in tree (mode %c)", p_s_tbS0, p_s_tbS0, n_op_mode); - } - - /* Check parameters. */ - switch (n_op_mode) { - case M_INSERT: - if ( n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) ) - reiserfs_panic(p_s_tb->tb_sb,"PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert", - n_item_num, B_NR_ITEMS(p_s_tbS0)); - break; - case M_PASTE: - case M_DELETE: - case M_CUT: - if ( n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0) ) { - print_block (p_s_tbS0, 0, -1, -1); - reiserfs_panic(p_s_tb->tb_sb,"PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n", n_item_num, n_op_mode, p_s_tb->insert_size[0]); - } - break; - default: - reiserfs_panic(p_s_tb->tb_sb,"PAP-8340: fix_nodes: Incorrect mode of operation"); - } -#endif + ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes; + + n_pos_in_item = p_s_tb->tb_path->pos_in_item; + + p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb); - if (get_mem_for_virtual_node (p_s_tb) == REPEAT_SEARCH) - // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat - return REPEAT_SEARCH; + /* we prepare and log the super here so it will already be in the + ** transaction when do_balance needs to change it. + ** This way do_balance won't have to schedule when trying to prepare + ** the super for logging + */ + reiserfs_prepare_for_journal(p_s_tb->tb_sb, + SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1); + journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb, + SB_BUFFER_WITH_SB(p_s_tb->tb_sb)); + if (FILESYSTEM_CHANGED_TB(p_s_tb)) + return REPEAT_SEARCH; + /* if it possible in indirect_to_direct conversion */ + if (buffer_locked(p_s_tbS0)) { + __wait_on_buffer(p_s_tbS0); + if (FILESYSTEM_CHANGED_TB(p_s_tb)) + return REPEAT_SEARCH; + } +#ifdef CONFIG_REISERFS_CHECK + if (cur_tb) { + print_cur_tb("fix_nodes"); + reiserfs_panic(p_s_tb->tb_sb, + "PAP-8305: fix_nodes: there is pending do_balance"); + } - /* Starting from the leaf level; for all levels n_h of the tree. */ - for ( n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++ ) { - if ( (n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON ) { - goto repeat; + if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) { + reiserfs_panic(p_s_tb->tb_sb, + "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate " + "at the beginning of fix_nodes or not in tree (mode %c)", + p_s_tbS0, p_s_tbS0, n_op_mode); } - if ( (n_ret_value = check_balance (n_op_mode, p_s_tb, n_h, n_item_num, - n_pos_in_item, p_s_ins_ih, data)) != CARRY_ON ) { - if ( n_ret_value == NO_BALANCING_NEEDED ) { - /* No balancing for higher levels needed. */ - if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) { - goto repeat; + /* Check parameters. */ + switch (n_op_mode) { + case M_INSERT: + if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0)) + reiserfs_panic(p_s_tb->tb_sb, + "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert", + n_item_num, B_NR_ITEMS(p_s_tbS0)); + break; + case M_PASTE: + case M_DELETE: + case M_CUT: + if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) { + print_block(p_s_tbS0, 0, -1, -1); + reiserfs_panic(p_s_tb->tb_sb, + "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n", + n_item_num, n_op_mode, + p_s_tb->insert_size[0]); } - if ( n_h != MAX_HEIGHT - 1 ) - p_s_tb->insert_size[n_h + 1] = 0; - /* ok, analysis and resource gathering are complete */ break; - } - goto repeat; + default: + reiserfs_panic(p_s_tb->tb_sb, + "PAP-8340: fix_nodes: Incorrect mode of operation"); } +#endif - if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) { - goto repeat; - } + if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH) + // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat + return REPEAT_SEARCH; - if ( (n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON ) { - goto repeat; /* No disk space, or schedule occurred and - analysis may be invalid and needs to be redone. */ - } - - if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h) ) { - /* We have a positive insert size but no nodes exist on this - level, this means that we are creating a new root. */ + /* Starting from the leaf level; for all levels n_h of the tree. */ + for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) { + if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) { + goto repeat; + } - RFALSE( p_s_tb->blknum[n_h] != 1, - "PAP-8350: creating new empty root"); + if ((n_ret_value = + check_balance(n_op_mode, p_s_tb, n_h, n_item_num, + n_pos_in_item, p_s_ins_ih, + data)) != CARRY_ON) { + if (n_ret_value == NO_BALANCING_NEEDED) { + /* No balancing for higher levels needed. */ + if ((n_ret_value = + get_neighbors(p_s_tb, n_h)) != CARRY_ON) { + goto repeat; + } + if (n_h != MAX_HEIGHT - 1) + p_s_tb->insert_size[n_h + 1] = 0; + /* ok, analysis and resource gathering are complete */ + break; + } + goto repeat; + } - if ( n_h < MAX_HEIGHT - 1 ) - p_s_tb->insert_size[n_h + 1] = 0; - } - else - if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1) ) { - if ( p_s_tb->blknum[n_h] > 1 ) { - /* The tree needs to be grown, so this node S[n_h] - which is the root node is split into two nodes, - and a new node (S[n_h+1]) will be created to - become the root node. */ - - RFALSE( n_h == MAX_HEIGHT - 1, - "PAP-8355: attempt to create too high of a tree"); - - p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + DC_SIZE; + if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) { + goto repeat; } - else - if ( n_h < MAX_HEIGHT - 1 ) - p_s_tb->insert_size[n_h + 1] = 0; - } - else - p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1); - } - - if ((n_ret_value = wait_tb_buffers_until_unlocked (p_s_tb)) == CARRY_ON) { - if (FILESYSTEM_CHANGED_TB(p_s_tb)) { - wait_tb_buffers_run = 1 ; - n_ret_value = REPEAT_SEARCH ; - goto repeat; - } else { - return CARRY_ON; + + if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) { + goto repeat; /* No disk space, or schedule occurred and + analysis may be invalid and needs to be redone. */ + } + + if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) { + /* We have a positive insert size but no nodes exist on this + level, this means that we are creating a new root. */ + + RFALSE(p_s_tb->blknum[n_h] != 1, + "PAP-8350: creating new empty root"); + + if (n_h < MAX_HEIGHT - 1) + p_s_tb->insert_size[n_h + 1] = 0; + } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) { + if (p_s_tb->blknum[n_h] > 1) { + /* The tree needs to be grown, so this node S[n_h] + which is the root node is split into two nodes, + and a new node (S[n_h+1]) will be created to + become the root node. */ + + RFALSE(n_h == MAX_HEIGHT - 1, + "PAP-8355: attempt to create too high of a tree"); + + p_s_tb->insert_size[n_h + 1] = + (DC_SIZE + + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + + DC_SIZE; + } else if (n_h < MAX_HEIGHT - 1) + p_s_tb->insert_size[n_h + 1] = 0; + } else + p_s_tb->insert_size[n_h + 1] = + (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1); } - } else { - wait_tb_buffers_run = 1 ; - goto repeat; - } - - repeat: - // fix_nodes was unable to perform its calculation due to - // filesystem got changed under us, lack of free disk space or i/o - // failure. If the first is the case - the search will be - // repeated. For now - free all resources acquired so far except - // for the new allocated nodes - { - int i; - /* Release path buffers. */ - if (wait_tb_buffers_run) { - pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path) ; + if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) { + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + wait_tb_buffers_run = 1; + n_ret_value = REPEAT_SEARCH; + goto repeat; + } else { + return CARRY_ON; + } } else { - pathrelse (p_s_tb->tb_path); - } - /* brelse all resources collected for balancing */ - for ( i = 0; i < MAX_HEIGHT; i++ ) { - if (wait_tb_buffers_run) { - reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->L[i]); - reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->R[i]); - reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FL[i]); - reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FR[i]); - reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFL[i]); - reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFR[i]); - } - - brelse (p_s_tb->L[i]);p_s_tb->L[i] = NULL; - brelse (p_s_tb->R[i]);p_s_tb->R[i] = NULL; - brelse (p_s_tb->FL[i]);p_s_tb->FL[i] = NULL; - brelse (p_s_tb->FR[i]);p_s_tb->FR[i] = NULL; - brelse (p_s_tb->CFL[i]);p_s_tb->CFL[i] = NULL; - brelse (p_s_tb->CFR[i]);p_s_tb->CFR[i] = NULL; + wait_tb_buffers_run = 1; + goto repeat; } - if (wait_tb_buffers_run) { - for ( i = 0; i < MAX_FEB_SIZE; i++ ) { - if ( p_s_tb->FEB[i] ) { - reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, - p_s_tb->FEB[i]) ; + repeat: + // fix_nodes was unable to perform its calculation due to + // filesystem got changed under us, lack of free disk space or i/o + // failure. If the first is the case - the search will be + // repeated. For now - free all resources acquired so far except + // for the new allocated nodes + { + int i; + + /* Release path buffers. */ + if (wait_tb_buffers_run) { + pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path); + } else { + pathrelse(p_s_tb->tb_path); + } + /* brelse all resources collected for balancing */ + for (i = 0; i < MAX_HEIGHT; i++) { + if (wait_tb_buffers_run) { + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, + p_s_tb->L[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, + p_s_tb->R[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, + p_s_tb->FL[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, + p_s_tb->FR[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, + p_s_tb-> + CFL[i]); + reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, + p_s_tb-> + CFR[i]); + } + + brelse(p_s_tb->L[i]); + p_s_tb->L[i] = NULL; + brelse(p_s_tb->R[i]); + p_s_tb->R[i] = NULL; + brelse(p_s_tb->FL[i]); + p_s_tb->FL[i] = NULL; + brelse(p_s_tb->FR[i]); + p_s_tb->FR[i] = NULL; + brelse(p_s_tb->CFL[i]); + p_s_tb->CFL[i] = NULL; + brelse(p_s_tb->CFR[i]); + p_s_tb->CFR[i] = NULL; + } + + if (wait_tb_buffers_run) { + for (i = 0; i < MAX_FEB_SIZE; i++) { + if (p_s_tb->FEB[i]) { + reiserfs_restore_prepared_buffer + (p_s_tb->tb_sb, p_s_tb->FEB[i]); + } + } } - } + return n_ret_value; } - return n_ret_value; - } } - /* Anatoly will probably forgive me renaming p_s_tb to tb. I just wanted to make lines shorter */ -void unfix_nodes (struct tree_balance * tb) +void unfix_nodes(struct tree_balance *tb) { - int i; - - /* Release path buffers. */ - pathrelse_and_restore (tb->tb_sb, tb->tb_path); - - /* brelse all resources collected for balancing */ - for ( i = 0; i < MAX_HEIGHT; i++ ) { - reiserfs_restore_prepared_buffer (tb->tb_sb, tb->L[i]); - reiserfs_restore_prepared_buffer (tb->tb_sb, tb->R[i]); - reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FL[i]); - reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FR[i]); - reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFL[i]); - reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFR[i]); - - brelse (tb->L[i]); - brelse (tb->R[i]); - brelse (tb->FL[i]); - brelse (tb->FR[i]); - brelse (tb->CFL[i]); - brelse (tb->CFR[i]); - } - - /* deal with list of allocated (used and unused) nodes */ - for ( i = 0; i < MAX_FEB_SIZE; i++ ) { - if ( tb->FEB[i] ) { - b_blocknr_t blocknr = tb->FEB[i]->b_blocknr ; - /* de-allocated block which was not used by balancing and - bforget about buffer for it */ - brelse (tb->FEB[i]); - reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0); - } - if (tb->used[i]) { - /* release used as new nodes including a new root */ - brelse (tb->used[i]); - } - } + int i; - if (tb->vn_buf) - reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb); + /* Release path buffers. */ + pathrelse_and_restore(tb->tb_sb, tb->tb_path); -} + /* brelse all resources collected for balancing */ + for (i = 0; i < MAX_HEIGHT; i++) { + reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]); + reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]); + reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]); + reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]); + reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]); + reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]); + + brelse(tb->L[i]); + brelse(tb->R[i]); + brelse(tb->FL[i]); + brelse(tb->FR[i]); + brelse(tb->CFL[i]); + brelse(tb->CFR[i]); + } + /* deal with list of allocated (used and unused) nodes */ + for (i = 0; i < MAX_FEB_SIZE; i++) { + if (tb->FEB[i]) { + b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; + /* de-allocated block which was not used by balancing and + bforget about buffer for it */ + brelse(tb->FEB[i]); + reiserfs_free_block(tb->transaction_handle, NULL, + blocknr, 0); + } + if (tb->used[i]) { + /* release used as new nodes including a new root */ + brelse(tb->used[i]); + } + } + if (tb->vn_buf) + reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb); +} diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c index 08d0508c2d3..37c1306eb9b 100644 --- a/fs/reiserfs/hashes.c +++ b/fs/reiserfs/hashes.c @@ -22,7 +22,6 @@ #include #include - #define DELTA 0x9E3779B9 #define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ #define PARTROUNDS 6 /* 6 gets complete mixing */ @@ -48,105 +47,75 @@ h1 += b1; \ } while(0) - u32 keyed_hash(const signed char *msg, int len) { - u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3}; + u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3 }; u32 h0 = k[0], h1 = k[1]; u32 a, b, c, d; u32 pad; int i; - - // assert(len >= 0 && len < 256); - pad = (u32)len | ((u32)len << 8); + // assert(len >= 0 && len < 256); + + pad = (u32) len | ((u32) len << 8); pad |= pad << 16; - while(len >= 16) - { - a = (u32)msg[ 0] | - (u32)msg[ 1] << 8 | - (u32)msg[ 2] << 16| - (u32)msg[ 3] << 24; - b = (u32)msg[ 4] | - (u32)msg[ 5] << 8 | - (u32)msg[ 6] << 16| - (u32)msg[ 7] << 24; - c = (u32)msg[ 8] | - (u32)msg[ 9] << 8 | - (u32)msg[10] << 16| - (u32)msg[11] << 24; - d = (u32)msg[12] | - (u32)msg[13] << 8 | - (u32)msg[14] << 16| - (u32)msg[15] << 24; - + while (len >= 16) { + a = (u32) msg[0] | + (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; + b = (u32) msg[4] | + (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; + c = (u32) msg[8] | + (u32) msg[9] << 8 | + (u32) msg[10] << 16 | (u32) msg[11] << 24; + d = (u32) msg[12] | + (u32) msg[13] << 8 | + (u32) msg[14] << 16 | (u32) msg[15] << 24; + TEACORE(PARTROUNDS); len -= 16; msg += 16; } - if (len >= 12) - { - a = (u32)msg[ 0] | - (u32)msg[ 1] << 8 | - (u32)msg[ 2] << 16| - (u32)msg[ 3] << 24; - b = (u32)msg[ 4] | - (u32)msg[ 5] << 8 | - (u32)msg[ 6] << 16| - (u32)msg[ 7] << 24; - c = (u32)msg[ 8] | - (u32)msg[ 9] << 8 | - (u32)msg[10] << 16| - (u32)msg[11] << 24; + if (len >= 12) { + a = (u32) msg[0] | + (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; + b = (u32) msg[4] | + (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; + c = (u32) msg[8] | + (u32) msg[9] << 8 | + (u32) msg[10] << 16 | (u32) msg[11] << 24; d = pad; - for(i = 12; i < len; i++) - { + for (i = 12; i < len; i++) { d <<= 8; d |= msg[i]; } - } - else if (len >= 8) - { - a = (u32)msg[ 0] | - (u32)msg[ 1] << 8 | - (u32)msg[ 2] << 16| - (u32)msg[ 3] << 24; - b = (u32)msg[ 4] | - (u32)msg[ 5] << 8 | - (u32)msg[ 6] << 16| - (u32)msg[ 7] << 24; + } else if (len >= 8) { + a = (u32) msg[0] | + (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; + b = (u32) msg[4] | + (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; c = d = pad; - for(i = 8; i < len; i++) - { + for (i = 8; i < len; i++) { c <<= 8; c |= msg[i]; } - } - else if (len >= 4) - { - a = (u32)msg[ 0] | - (u32)msg[ 1] << 8 | - (u32)msg[ 2] << 16| - (u32)msg[ 3] << 24; + } else if (len >= 4) { + a = (u32) msg[0] | + (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; b = c = d = pad; - for(i = 4; i < len; i++) - { + for (i = 4; i < len; i++) { b <<= 8; b |= msg[i]; } - } - else - { + } else { a = b = c = d = pad; - for(i = 0; i < len; i++) - { + for (i = 0; i < len; i++) { a <<= 8; a |= msg[i]; } @@ -155,55 +124,59 @@ u32 keyed_hash(const signed char *msg, int len) TEACORE(FULLROUNDS); /* return 0;*/ - return h0^h1; + return h0 ^ h1; } /* What follows in this file is copyright 2000 by Hans Reiser, and the * licensing of what follows is governed by reiserfs/README */ -u32 yura_hash (const signed char *msg, int len) +u32 yura_hash(const signed char *msg, int len) { - int j, pow; - u32 a, c; - int i; - - for (pow=1,i=1; i < len; i++) pow = pow * 10; - - if (len == 1) - a = msg[0]-48; - else - a = (msg[0] - 48) * pow; - - for (i=1; i < len; i++) { - c = msg[i] - 48; - for (pow=1,j=i; j < len-1; j++) pow = pow * 10; - a = a + c * pow; - } - - for (; i < 40; i++) { - c = '0' - 48; - for (pow=1,j=i; j < len-1; j++) pow = pow * 10; - a = a + c * pow; - } - - for (; i < 256; i++) { - c = i; - for (pow=1,j=i; j < len-1; j++) pow = pow * 10; - a = a + c * pow; - } - - a = a << 7; - return a; + int j, pow; + u32 a, c; + int i; + + for (pow = 1, i = 1; i < len; i++) + pow = pow * 10; + + if (len == 1) + a = msg[0] - 48; + else + a = (msg[0] - 48) * pow; + + for (i = 1; i < len; i++) { + c = msg[i] - 48; + for (pow = 1, j = i; j < len - 1; j++) + pow = pow * 10; + a = a + c * pow; + } + + for (; i < 40; i++) { + c = '0' - 48; + for (pow = 1, j = i; j < len - 1; j++) + pow = pow * 10; + a = a + c * pow; + } + + for (; i < 256; i++) { + c = i; + for (pow = 1, j = i; j < len - 1; j++) + pow = pow * 10; + a = a + c * pow; + } + + a = a << 7; + return a; } -u32 r5_hash (const signed char *msg, int len) +u32 r5_hash(const signed char *msg, int len) { - u32 a=0; - while(*msg) { - a += *msg << 4; - a += *msg >> 4; - a *= 11; - msg++; - } - return a; + u32 a = 0; + while (*msg) { + a += *msg << 4; + a += *msg >> 4; + a *= 11; + msg++; + } + return a; } diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c index a362125da0d..6c5a726fd34 100644 --- a/fs/reiserfs/ibalance.c +++ b/fs/reiserfs/ibalance.c @@ -10,13 +10,8 @@ #include /* this is one and only function that is used outside (do_balance.c) */ -int balance_internal ( - struct tree_balance * , - int, - int, - struct item_head * , - struct buffer_head ** - ); +int balance_internal(struct tree_balance *, + int, int, struct item_head *, struct buffer_head **); /* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ #define INTERNAL_SHIFT_FROM_S_TO_L 0 @@ -27,464 +22,474 @@ int balance_internal ( #define INTERNAL_INSERT_TO_L 5 #define INTERNAL_INSERT_TO_R 6 -static void internal_define_dest_src_infos ( - int shift_mode, - struct tree_balance * tb, - int h, - struct buffer_info * dest_bi, - struct buffer_info * src_bi, - int * d_key, - struct buffer_head ** cf - ) +static void internal_define_dest_src_infos(int shift_mode, + struct tree_balance *tb, + int h, + struct buffer_info *dest_bi, + struct buffer_info *src_bi, + int *d_key, struct buffer_head **cf) { - memset (dest_bi, 0, sizeof (struct buffer_info)); - memset (src_bi, 0, sizeof (struct buffer_info)); - /* define dest, src, dest parent, dest position */ - switch (shift_mode) { - case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ - src_bi->tb = tb; - src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); - src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); - src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[h]; - dest_bi->bi_parent = tb->FL[h]; - dest_bi->bi_position = get_left_neighbor_position (tb, h); - *d_key = tb->lkey[h]; - *cf = tb->CFL[h]; - break; - case INTERNAL_SHIFT_FROM_L_TO_S: - src_bi->tb = tb; - src_bi->bi_bh = tb->L[h]; - src_bi->bi_parent = tb->FL[h]; - src_bi->bi_position = get_left_neighbor_position (tb, h); - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); - dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ - *d_key = tb->lkey[h]; - *cf = tb->CFL[h]; - break; - - case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ - src_bi->tb = tb; - src_bi->bi_bh = tb->R[h]; - src_bi->bi_parent = tb->FR[h]; - src_bi->bi_position = get_right_neighbor_position (tb, h); - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); - dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); - *d_key = tb->rkey[h]; - *cf = tb->CFR[h]; - break; - - case INTERNAL_SHIFT_FROM_S_TO_R: - src_bi->tb = tb; - src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); - src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); - src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[h]; - dest_bi->bi_parent = tb->FR[h]; - dest_bi->bi_position = get_right_neighbor_position (tb, h); - *d_key = tb->rkey[h]; - *cf = tb->CFR[h]; - break; - - case INTERNAL_INSERT_TO_L: - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[h]; - dest_bi->bi_parent = tb->FL[h]; - dest_bi->bi_position = get_left_neighbor_position (tb, h); - break; - - case INTERNAL_INSERT_TO_S: - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); - dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); - break; - - case INTERNAL_INSERT_TO_R: - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[h]; - dest_bi->bi_parent = tb->FR[h]; - dest_bi->bi_position = get_right_neighbor_position (tb, h); - break; - - default: - reiserfs_panic (tb->tb_sb, "internal_define_dest_src_infos: shift type is unknown (%d)", shift_mode); - } + memset(dest_bi, 0, sizeof(struct buffer_info)); + memset(src_bi, 0, sizeof(struct buffer_info)); + /* define dest, src, dest parent, dest position */ + switch (shift_mode) { + case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ + src_bi->tb = tb; + src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); + src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); + src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->L[h]; + dest_bi->bi_parent = tb->FL[h]; + dest_bi->bi_position = get_left_neighbor_position(tb, h); + *d_key = tb->lkey[h]; + *cf = tb->CFL[h]; + break; + case INTERNAL_SHIFT_FROM_L_TO_S: + src_bi->tb = tb; + src_bi->bi_bh = tb->L[h]; + src_bi->bi_parent = tb->FL[h]; + src_bi->bi_position = get_left_neighbor_position(tb, h); + dest_bi->tb = tb; + dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); + dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); + dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ + *d_key = tb->lkey[h]; + *cf = tb->CFL[h]; + break; + + case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ + src_bi->tb = tb; + src_bi->bi_bh = tb->R[h]; + src_bi->bi_parent = tb->FR[h]; + src_bi->bi_position = get_right_neighbor_position(tb, h); + dest_bi->tb = tb; + dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); + dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); + dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); + *d_key = tb->rkey[h]; + *cf = tb->CFR[h]; + break; + + case INTERNAL_SHIFT_FROM_S_TO_R: + src_bi->tb = tb; + src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); + src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); + src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->R[h]; + dest_bi->bi_parent = tb->FR[h]; + dest_bi->bi_position = get_right_neighbor_position(tb, h); + *d_key = tb->rkey[h]; + *cf = tb->CFR[h]; + break; + + case INTERNAL_INSERT_TO_L: + dest_bi->tb = tb; + dest_bi->bi_bh = tb->L[h]; + dest_bi->bi_parent = tb->FL[h]; + dest_bi->bi_position = get_left_neighbor_position(tb, h); + break; + + case INTERNAL_INSERT_TO_S: + dest_bi->tb = tb; + dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); + dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); + dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); + break; + + case INTERNAL_INSERT_TO_R: + dest_bi->tb = tb; + dest_bi->bi_bh = tb->R[h]; + dest_bi->bi_parent = tb->FR[h]; + dest_bi->bi_position = get_right_neighbor_position(tb, h); + break; + + default: + reiserfs_panic(tb->tb_sb, + "internal_define_dest_src_infos: shift type is unknown (%d)", + shift_mode); + } } - - /* Insert count node pointers into buffer cur before position to + 1. * Insert count items into buffer cur before position to. * Items and node pointers are specified by inserted and bh respectively. - */ -static void internal_insert_childs (struct buffer_info * cur_bi, - int to, int count, - struct item_head * inserted, - struct buffer_head ** bh - ) + */ +static void internal_insert_childs(struct buffer_info *cur_bi, + int to, int count, + struct item_head *inserted, + struct buffer_head **bh) { - struct buffer_head * cur = cur_bi->bi_bh; - struct block_head * blkh; - int nr; - struct reiserfs_key * ih; - struct disk_child new_dc[2]; - struct disk_child * dc; - int i; - - if (count <= 0) - return; - - blkh = B_BLK_HEAD(cur); - nr = blkh_nr_item(blkh); - - RFALSE( count > 2, - "too many children (%d) are to be inserted", count); - RFALSE( B_FREE_SPACE (cur) < count * (KEY_SIZE + DC_SIZE), - "no enough free space (%d), needed %d bytes", - B_FREE_SPACE (cur), count * (KEY_SIZE + DC_SIZE)); - - /* prepare space for count disk_child */ - dc = B_N_CHILD(cur,to+1); - - memmove (dc + count, dc, (nr+1-(to+1)) * DC_SIZE); - - /* copy to_be_insert disk children */ - for (i = 0; i < count; i ++) { - put_dc_size( &(new_dc[i]), MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); - put_dc_block_number( &(new_dc[i]), bh[i]->b_blocknr ); - } - memcpy (dc, new_dc, DC_SIZE * count); - - - /* prepare space for count items */ - ih = B_N_PDELIM_KEY (cur, ((to == -1) ? 0 : to)); - - memmove (ih + count, ih, (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); - - /* copy item headers (keys) */ - memcpy (ih, inserted, KEY_SIZE); - if ( count > 1 ) - memcpy (ih + 1, inserted + 1, KEY_SIZE); - - /* sizes, item number */ - set_blkh_nr_item( blkh, blkh_nr_item(blkh) + count ); - set_blkh_free_space( blkh, - blkh_free_space(blkh) - count * (DC_SIZE + KEY_SIZE ) ); - - do_balance_mark_internal_dirty (cur_bi->tb, cur,0); - - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - check_internal (cur); - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - - if (cur_bi->bi_parent) { - struct disk_child *t_dc = B_N_CHILD (cur_bi->bi_parent,cur_bi->bi_position); - put_dc_size( t_dc, dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE))); - do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - check_internal (cur_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - } + struct buffer_head *cur = cur_bi->bi_bh; + struct block_head *blkh; + int nr; + struct reiserfs_key *ih; + struct disk_child new_dc[2]; + struct disk_child *dc; + int i; + + if (count <= 0) + return; + + blkh = B_BLK_HEAD(cur); + nr = blkh_nr_item(blkh); + + RFALSE(count > 2, "too many children (%d) are to be inserted", count); + RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE), + "no enough free space (%d), needed %d bytes", + B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE)); + + /* prepare space for count disk_child */ + dc = B_N_CHILD(cur, to + 1); + + memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE); + + /* copy to_be_insert disk children */ + for (i = 0; i < count; i++) { + put_dc_size(&(new_dc[i]), + MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); + put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr); + } + memcpy(dc, new_dc, DC_SIZE * count); + + /* prepare space for count items */ + ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to)); + + memmove(ih + count, ih, + (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); + + /* copy item headers (keys) */ + memcpy(ih, inserted, KEY_SIZE); + if (count > 1) + memcpy(ih + 1, inserted + 1, KEY_SIZE); + + /* sizes, item number */ + set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count); + set_blkh_free_space(blkh, + blkh_free_space(blkh) - count * (DC_SIZE + + KEY_SIZE)); + + do_balance_mark_internal_dirty(cur_bi->tb, cur, 0); + + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + check_internal(cur); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + + if (cur_bi->bi_parent) { + struct disk_child *t_dc = + B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position); + put_dc_size(t_dc, + dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE))); + do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, + 0); + + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + check_internal(cur_bi->bi_parent); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + } } - /* Delete del_num items and node pointers from buffer cur starting from * * the first_i'th item and first_p'th pointers respectively. */ -static void internal_delete_pointers_items ( - struct buffer_info * cur_bi, - int first_p, - int first_i, - int del_num - ) +static void internal_delete_pointers_items(struct buffer_info *cur_bi, + int first_p, + int first_i, int del_num) { - struct buffer_head * cur = cur_bi->bi_bh; - int nr; - struct block_head * blkh; - struct reiserfs_key * key; - struct disk_child * dc; - - RFALSE( cur == NULL, "buffer is 0"); - RFALSE( del_num < 0, - "negative number of items (%d) can not be deleted", del_num); - RFALSE( first_p < 0 || first_p + del_num > B_NR_ITEMS (cur) + 1 || first_i < 0, - "first pointer order (%d) < 0 or " - "no so many pointers (%d), only (%d) or " - "first key order %d < 0", first_p, - first_p + del_num, B_NR_ITEMS (cur) + 1, first_i); - if ( del_num == 0 ) - return; - - blkh = B_BLK_HEAD(cur); - nr = blkh_nr_item(blkh); - - if ( first_p == 0 && del_num == nr + 1 ) { - RFALSE( first_i != 0, "1st deleted key must have order 0, not %d", first_i); - make_empty_node (cur_bi); - return; - } - - RFALSE( first_i + del_num > B_NR_ITEMS (cur), - "first_i = %d del_num = %d " - "no so many keys (%d) in the node (%b)(%z)", - first_i, del_num, first_i + del_num, cur, cur); - - - /* deleting */ - dc = B_N_CHILD (cur, first_p); - - memmove (dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE); - key = B_N_PDELIM_KEY (cur, first_i); - memmove (key, key + del_num, (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - del_num) * DC_SIZE); - - - /* sizes, item number */ - set_blkh_nr_item( blkh, blkh_nr_item(blkh) - del_num ); - set_blkh_free_space( blkh, - blkh_free_space(blkh) + (del_num * (KEY_SIZE + DC_SIZE) ) ); - - do_balance_mark_internal_dirty (cur_bi->tb, cur, 0); - /*&&&&&&&&&&&&&&&&&&&&&&&*/ - check_internal (cur); - /*&&&&&&&&&&&&&&&&&&&&&&&*/ - - if (cur_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD (cur_bi->bi_parent, cur_bi->bi_position); - put_dc_size( t_dc, dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE) ) ); - - do_balance_mark_internal_dirty (cur_bi->tb, cur_bi->bi_parent,0); - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - check_internal (cur_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - } -} + struct buffer_head *cur = cur_bi->bi_bh; + int nr; + struct block_head *blkh; + struct reiserfs_key *key; + struct disk_child *dc; + + RFALSE(cur == NULL, "buffer is 0"); + RFALSE(del_num < 0, + "negative number of items (%d) can not be deleted", del_num); + RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1 + || first_i < 0, + "first pointer order (%d) < 0 or " + "no so many pointers (%d), only (%d) or " + "first key order %d < 0", first_p, first_p + del_num, + B_NR_ITEMS(cur) + 1, first_i); + if (del_num == 0) + return; + + blkh = B_BLK_HEAD(cur); + nr = blkh_nr_item(blkh); + + if (first_p == 0 && del_num == nr + 1) { + RFALSE(first_i != 0, + "1st deleted key must have order 0, not %d", first_i); + make_empty_node(cur_bi); + return; + } + RFALSE(first_i + del_num > B_NR_ITEMS(cur), + "first_i = %d del_num = %d " + "no so many keys (%d) in the node (%b)(%z)", + first_i, del_num, first_i + del_num, cur, cur); + + /* deleting */ + dc = B_N_CHILD(cur, first_p); + + memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE); + key = B_N_PDELIM_KEY(cur, first_i); + memmove(key, key + del_num, + (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - + del_num) * DC_SIZE); + + /* sizes, item number */ + set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num); + set_blkh_free_space(blkh, + blkh_free_space(blkh) + + (del_num * (KEY_SIZE + DC_SIZE))); + + do_balance_mark_internal_dirty(cur_bi->tb, cur, 0); + /*&&&&&&&&&&&&&&&&&&&&&&& */ + check_internal(cur); + /*&&&&&&&&&&&&&&&&&&&&&&& */ + + if (cur_bi->bi_parent) { + struct disk_child *t_dc; + t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position); + put_dc_size(t_dc, + dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE))); + + do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, + 0); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + check_internal(cur_bi->bi_parent); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + } +} /* delete n node pointers and items starting from given position */ -static void internal_delete_childs (struct buffer_info * cur_bi, - int from, int n) +static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n) { - int i_from; + int i_from; - i_from = (from == 0) ? from : from - 1; + i_from = (from == 0) ? from : from - 1; - /* delete n pointers starting from `from' position in CUR; - delete n keys starting from 'i_from' position in CUR; - */ - internal_delete_pointers_items (cur_bi, from, i_from, n); + /* delete n pointers starting from `from' position in CUR; + delete n keys starting from 'i_from' position in CUR; + */ + internal_delete_pointers_items(cur_bi, from, i_from, n); } - /* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest * last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest */ -static void internal_copy_pointers_items ( - struct buffer_info * dest_bi, - struct buffer_head * src, - int last_first, int cpy_num - ) +static void internal_copy_pointers_items(struct buffer_info *dest_bi, + struct buffer_head *src, + int last_first, int cpy_num) { - /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * - * as delimiting key have already inserted to buffer dest.*/ - struct buffer_head * dest = dest_bi->bi_bh; - int nr_dest, nr_src; - int dest_order, src_order; - struct block_head * blkh; - struct reiserfs_key * key; - struct disk_child * dc; - - nr_src = B_NR_ITEMS (src); - - RFALSE( dest == NULL || src == NULL, - "src (%p) or dest (%p) buffer is 0", src, dest); - RFALSE( last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, - "invalid last_first parameter (%d)", last_first); - RFALSE( nr_src < cpy_num - 1, - "no so many items (%d) in src (%d)", cpy_num, nr_src); - RFALSE( cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num); - RFALSE( cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest), - "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)", - cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest)); - - if ( cpy_num == 0 ) - return; + /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * + * as delimiting key have already inserted to buffer dest.*/ + struct buffer_head *dest = dest_bi->bi_bh; + int nr_dest, nr_src; + int dest_order, src_order; + struct block_head *blkh; + struct reiserfs_key *key; + struct disk_child *dc; + + nr_src = B_NR_ITEMS(src); + + RFALSE(dest == NULL || src == NULL, + "src (%p) or dest (%p) buffer is 0", src, dest); + RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, + "invalid last_first parameter (%d)", last_first); + RFALSE(nr_src < cpy_num - 1, + "no so many items (%d) in src (%d)", cpy_num, nr_src); + RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num); + RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest), + "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)", + cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest)); + + if (cpy_num == 0) + return; /* coping */ - blkh = B_BLK_HEAD(dest); - nr_dest = blkh_nr_item(blkh); + blkh = B_BLK_HEAD(dest); + nr_dest = blkh_nr_item(blkh); - /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest;*/ - /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0;*/ - (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order = nr_src - cpy_num + 1) : - (dest_order = nr_dest, src_order = 0); + /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */ + /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */ + (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order = + nr_src - cpy_num + 1) : (dest_order = + nr_dest, + src_order = + 0); - /* prepare space for cpy_num pointers */ - dc = B_N_CHILD (dest, dest_order); + /* prepare space for cpy_num pointers */ + dc = B_N_CHILD(dest, dest_order); - memmove (dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE); + memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE); /* insert pointers */ - memcpy (dc, B_N_CHILD (src, src_order), DC_SIZE * cpy_num); - - - /* prepare space for cpy_num - 1 item headers */ - key = B_N_PDELIM_KEY(dest, dest_order); - memmove (key + cpy_num - 1, key, - KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + cpy_num)); - - - /* insert headers */ - memcpy (key, B_N_PDELIM_KEY (src, src_order), KEY_SIZE * (cpy_num - 1)); - - /* sizes, item number */ - set_blkh_nr_item( blkh, blkh_nr_item(blkh) + (cpy_num - 1 ) ); - set_blkh_free_space( blkh, - blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num ) ); - - do_balance_mark_internal_dirty (dest_bi->tb, dest, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - check_internal (dest); - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position); - put_dc_size( t_dc, dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num) ); - - do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0); - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - check_internal (dest_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - } + memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num); + + /* prepare space for cpy_num - 1 item headers */ + key = B_N_PDELIM_KEY(dest, dest_order); + memmove(key + cpy_num - 1, key, + KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + + cpy_num)); + + /* insert headers */ + memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1)); + + /* sizes, item number */ + set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1)); + set_blkh_free_space(blkh, + blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) + + DC_SIZE * cpy_num)); + + do_balance_mark_internal_dirty(dest_bi->tb, dest, 0); + + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + check_internal(dest); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + + if (dest_bi->bi_parent) { + struct disk_child *t_dc; + t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); + put_dc_size(t_dc, + dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) + + DC_SIZE * cpy_num)); + + do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, + 0); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + check_internal(dest_bi->bi_parent); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + } } - /* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. * Delete cpy_num - del_par items and node pointers from buffer src. * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. */ -static void internal_move_pointers_items (struct buffer_info * dest_bi, - struct buffer_info * src_bi, - int last_first, int cpy_num, int del_par) +static void internal_move_pointers_items(struct buffer_info *dest_bi, + struct buffer_info *src_bi, + int last_first, int cpy_num, + int del_par) { - int first_pointer; - int first_item; - - internal_copy_pointers_items (dest_bi, src_bi->bi_bh, last_first, cpy_num); - - if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ - first_pointer = 0; - first_item = 0; - /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, - for key - with first_item */ - internal_delete_pointers_items (src_bi, first_pointer, first_item, cpy_num - del_par); - } else { /* shift_right occurs */ - int i, j; - - i = ( cpy_num - del_par == ( j = B_NR_ITEMS(src_bi->bi_bh)) + 1 ) ? 0 : j - cpy_num + del_par; - - internal_delete_pointers_items (src_bi, j + 1 - cpy_num + del_par, i, cpy_num - del_par); - } + int first_pointer; + int first_item; + + internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first, + cpy_num); + + if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ + first_pointer = 0; + first_item = 0; + /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, + for key - with first_item */ + internal_delete_pointers_items(src_bi, first_pointer, + first_item, cpy_num - del_par); + } else { /* shift_right occurs */ + int i, j; + + i = (cpy_num - del_par == + (j = + B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num + + del_par; + + internal_delete_pointers_items(src_bi, + j + 1 - cpy_num + del_par, i, + cpy_num - del_par); + } } /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ -static void internal_insert_key (struct buffer_info * dest_bi, - int dest_position_before, /* insert key before key with n_dest number */ - struct buffer_head * src, - int src_position) +static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */ + struct buffer_head *src, int src_position) { - struct buffer_head * dest = dest_bi->bi_bh; - int nr; - struct block_head * blkh; - struct reiserfs_key * key; - - RFALSE( dest == NULL || src == NULL, - "source(%p) or dest(%p) buffer is 0", src, dest); - RFALSE( dest_position_before < 0 || src_position < 0, - "source(%d) or dest(%d) key number less than 0", - src_position, dest_position_before); - RFALSE( dest_position_before > B_NR_ITEMS (dest) || - src_position >= B_NR_ITEMS(src), - "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))", - dest_position_before, B_NR_ITEMS (dest), - src_position, B_NR_ITEMS(src)); - RFALSE( B_FREE_SPACE (dest) < KEY_SIZE, - "no enough free space (%d) in dest buffer", B_FREE_SPACE (dest)); - - blkh = B_BLK_HEAD(dest); - nr = blkh_nr_item(blkh); - - /* prepare space for inserting key */ - key = B_N_PDELIM_KEY (dest, dest_position_before); - memmove (key + 1, key, (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); - - /* insert key */ - memcpy (key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE); - - /* Change dirt, free space, item number fields. */ - - set_blkh_nr_item( blkh, blkh_nr_item(blkh) + 1 ); - set_blkh_free_space( blkh, blkh_free_space(blkh) - KEY_SIZE ); - - do_balance_mark_internal_dirty (dest_bi->tb, dest, 0); - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position); - put_dc_size( t_dc, dc_size(t_dc) + KEY_SIZE ); - - do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0); - } + struct buffer_head *dest = dest_bi->bi_bh; + int nr; + struct block_head *blkh; + struct reiserfs_key *key; + + RFALSE(dest == NULL || src == NULL, + "source(%p) or dest(%p) buffer is 0", src, dest); + RFALSE(dest_position_before < 0 || src_position < 0, + "source(%d) or dest(%d) key number less than 0", + src_position, dest_position_before); + RFALSE(dest_position_before > B_NR_ITEMS(dest) || + src_position >= B_NR_ITEMS(src), + "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))", + dest_position_before, B_NR_ITEMS(dest), + src_position, B_NR_ITEMS(src)); + RFALSE(B_FREE_SPACE(dest) < KEY_SIZE, + "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest)); + + blkh = B_BLK_HEAD(dest); + nr = blkh_nr_item(blkh); + + /* prepare space for inserting key */ + key = B_N_PDELIM_KEY(dest, dest_position_before); + memmove(key + 1, key, + (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); + + /* insert key */ + memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE); + + /* Change dirt, free space, item number fields. */ + + set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1); + set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE); + + do_balance_mark_internal_dirty(dest_bi->tb, dest, 0); + + if (dest_bi->bi_parent) { + struct disk_child *t_dc; + t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); + put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE); + + do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, + 0); + } } - - /* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. * Replace d_key'th key in buffer cfl. * Delete pointer_amount items and node pointers from buffer src. */ /* this can be invoked both to shift from S to L and from R to S */ -static void internal_shift_left ( - int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ - struct tree_balance * tb, - int h, - int pointer_amount - ) +static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ + struct tree_balance *tb, + int h, int pointer_amount) { - struct buffer_info dest_bi, src_bi; - struct buffer_head * cf; - int d_key_position; - - internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); - - /*printk("pointer_amount = %d\n",pointer_amount);*/ - - if (pointer_amount) { - /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ - internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position); - - if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) { - if (src_bi.bi_position/*src->b_item_order*/ == 0) - replace_key (tb, cf, d_key_position, src_bi.bi_parent/*src->b_parent*/, 0); - } else - replace_key (tb, cf, d_key_position, src_bi.bi_bh, pointer_amount - 1); - } - /* last parameter is del_parameter */ - internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 0); + struct buffer_info dest_bi, src_bi; + struct buffer_head *cf; + int d_key_position; + + internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi, + &d_key_position, &cf); + + /*printk("pointer_amount = %d\n",pointer_amount); */ + + if (pointer_amount) { + /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ + internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, + d_key_position); + + if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) { + if (src_bi.bi_position /*src->b_item_order */ == 0) + replace_key(tb, cf, d_key_position, + src_bi. + bi_parent /*src->b_parent */ , 0); + } else + replace_key(tb, cf, d_key_position, src_bi.bi_bh, + pointer_amount - 1); + } + /* last parameter is del_parameter */ + internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, + pointer_amount, 0); } @@ -493,67 +498,66 @@ static void internal_shift_left ( * Delete n - 1 items and node pointers from buffer S[h]. */ /* it always shifts from S[h] to L[h] */ -static void internal_shift1_left ( - struct tree_balance * tb, - int h, - int pointer_amount - ) +static void internal_shift1_left(struct tree_balance *tb, + int h, int pointer_amount) { - struct buffer_info dest_bi, src_bi; - struct buffer_head * cf; - int d_key_position; + struct buffer_info dest_bi, src_bi; + struct buffer_head *cf; + int d_key_position; - internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); + internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, + &dest_bi, &src_bi, &d_key_position, &cf); - if ( pointer_amount > 0 ) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ - internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position); - /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]);*/ + if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ + internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, + d_key_position); + /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */ - /* last parameter is del_parameter */ - internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 1); - /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1);*/ + /* last parameter is del_parameter */ + internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, + pointer_amount, 1); + /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */ } - /* Insert d_key'th (delimiting) key from buffer cfr to head of dest. * Copy n node pointers and n - 1 items from buffer src to buffer dest. * Replace d_key'th key in buffer cfr. * Delete n items and node pointers from buffer src. */ -static void internal_shift_right ( - int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ - struct tree_balance * tb, - int h, - int pointer_amount - ) +static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ + struct tree_balance *tb, + int h, int pointer_amount) { - struct buffer_info dest_bi, src_bi; - struct buffer_head * cf; - int d_key_position; - int nr; - - - internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); - - nr = B_NR_ITEMS (src_bi.bi_bh); - - if (pointer_amount > 0) { - /* insert delimiting key from common father of dest and src to dest node into position 0 */ - internal_insert_key (&dest_bi, 0, cf, d_key_position); - if (nr == pointer_amount - 1) { - RFALSE( src_bi.bi_bh != PATH_H_PBUFFER (tb->tb_path, h)/*tb->S[h]*/ || - dest_bi.bi_bh != tb->R[h], - "src (%p) must be == tb->S[h](%p) when it disappears", - src_bi.bi_bh, PATH_H_PBUFFER (tb->tb_path, h)); - /* when S[h] disappers replace left delemiting key as well */ - if (tb->CFL[h]) - replace_key (tb, cf, d_key_position, tb->CFL[h], tb->lkey[h]); - } else - replace_key (tb, cf, d_key_position, src_bi.bi_bh, nr - pointer_amount); - } - - /* last parameter is del_parameter */ - internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 0); + struct buffer_info dest_bi, src_bi; + struct buffer_head *cf; + int d_key_position; + int nr; + + internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi, + &d_key_position, &cf); + + nr = B_NR_ITEMS(src_bi.bi_bh); + + if (pointer_amount > 0) { + /* insert delimiting key from common father of dest and src to dest node into position 0 */ + internal_insert_key(&dest_bi, 0, cf, d_key_position); + if (nr == pointer_amount - 1) { + RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || + dest_bi.bi_bh != tb->R[h], + "src (%p) must be == tb->S[h](%p) when it disappears", + src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h)); + /* when S[h] disappers replace left delemiting key as well */ + if (tb->CFL[h]) + replace_key(tb, cf, d_key_position, tb->CFL[h], + tb->lkey[h]); + } else + replace_key(tb, cf, d_key_position, src_bi.bi_bh, + nr - pointer_amount); + } + + /* last parameter is del_parameter */ + internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, + pointer_amount, 0); } /* Insert delimiting key to R[h]. @@ -561,498 +565,526 @@ static void internal_shift_right ( * Delete n - 1 items and node pointers from buffer S[h]. */ /* it always shift from S[h] to R[h] */ -static void internal_shift1_right ( - struct tree_balance * tb, - int h, - int pointer_amount - ) +static void internal_shift1_right(struct tree_balance *tb, + int h, int pointer_amount) { - struct buffer_info dest_bi, src_bi; - struct buffer_head * cf; - int d_key_position; - - internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); - - if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ - internal_insert_key (&dest_bi, 0, cf, d_key_position); - /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]);*/ - - /* last parameter is del_parameter */ - internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 1); - /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1);*/ -} + struct buffer_info dest_bi, src_bi; + struct buffer_head *cf; + int d_key_position; + + internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, + &dest_bi, &src_bi, &d_key_position, &cf); + + if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ + internal_insert_key(&dest_bi, 0, cf, d_key_position); + /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */ + /* last parameter is del_parameter */ + internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, + pointer_amount, 1); + /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */ +} /* Delete insert_num node pointers together with their left items * and balance current node.*/ -static void balance_internal_when_delete (struct tree_balance * tb, - int h, int child_pos) +static void balance_internal_when_delete(struct tree_balance *tb, + int h, int child_pos) { - int insert_num; - int n; - struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h); - struct buffer_info bi; - - insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE)); - - /* delete child-node-pointer(s) together with their left item(s) */ - bi.tb = tb; - bi.bi_bh = tbSh; - bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); - bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); - - internal_delete_childs (&bi, child_pos, -insert_num); - - RFALSE( tb->blknum[h] > 1, - "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]); - - n = B_NR_ITEMS(tbSh); - - if ( tb->lnum[h] == 0 && tb->rnum[h] == 0 ) { - if ( tb->blknum[h] == 0 ) { - /* node S[h] (root of the tree) is empty now */ - struct buffer_head *new_root; - - RFALSE( n || B_FREE_SPACE (tbSh) != MAX_CHILD_SIZE(tbSh) - DC_SIZE, - "buffer must have only 0 keys (%d)", n); - RFALSE( bi.bi_parent, "root has parent (%p)", bi.bi_parent); - - /* choose a new root */ - if ( ! tb->L[h-1] || ! B_NR_ITEMS(tb->L[h-1]) ) - new_root = tb->R[h-1]; - else - new_root = tb->L[h-1]; - /* switch super block's tree root block number to the new value */ - PUT_SB_ROOT_BLOCK( tb->tb_sb, new_root->b_blocknr ); - //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; - PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) - 1 ); - - do_balance_mark_sb_dirty (tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1); - /*&&&&&&&&&&&&&&&&&&&&&&*/ - if (h > 1) - /* use check_internal if new root is an internal node */ - check_internal (new_root); - /*&&&&&&&&&&&&&&&&&&&&&&*/ - - /* do what is needed for buffer thrown from tree */ - reiserfs_invalidate_buffer(tb, tbSh); - return; + int insert_num; + int n; + struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); + struct buffer_info bi; + + insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE)); + + /* delete child-node-pointer(s) together with their left item(s) */ + bi.tb = tb; + bi.bi_bh = tbSh; + bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); + bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); + + internal_delete_childs(&bi, child_pos, -insert_num); + + RFALSE(tb->blknum[h] > 1, + "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]); + + n = B_NR_ITEMS(tbSh); + + if (tb->lnum[h] == 0 && tb->rnum[h] == 0) { + if (tb->blknum[h] == 0) { + /* node S[h] (root of the tree) is empty now */ + struct buffer_head *new_root; + + RFALSE(n + || B_FREE_SPACE(tbSh) != + MAX_CHILD_SIZE(tbSh) - DC_SIZE, + "buffer must have only 0 keys (%d)", n); + RFALSE(bi.bi_parent, "root has parent (%p)", + bi.bi_parent); + + /* choose a new root */ + if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1])) + new_root = tb->R[h - 1]; + else + new_root = tb->L[h - 1]; + /* switch super block's tree root block number to the new value */ + PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); + //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; + PUT_SB_TREE_HEIGHT(tb->tb_sb, + SB_TREE_HEIGHT(tb->tb_sb) - 1); + + do_balance_mark_sb_dirty(tb, + REISERFS_SB(tb->tb_sb)->s_sbh, + 1); + /*&&&&&&&&&&&&&&&&&&&&&& */ + if (h > 1) + /* use check_internal if new root is an internal node */ + check_internal(new_root); + /*&&&&&&&&&&&&&&&&&&&&&& */ + + /* do what is needed for buffer thrown from tree */ + reiserfs_invalidate_buffer(tb, tbSh); + return; + } + return; + } + + if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */ + + RFALSE(tb->rnum[h] != 0, + "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", + h, tb->rnum[h]); + + internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1); + reiserfs_invalidate_buffer(tb, tbSh); + + return; + } + + if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */ + RFALSE(tb->lnum[h] != 0, + "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", + h, tb->lnum[h]); + + internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1); + + reiserfs_invalidate_buffer(tb, tbSh); + return; } - return; - } - - if ( tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1 ) { /* join S[h] with L[h] */ - - RFALSE( tb->rnum[h] != 0, - "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", - h, tb->rnum[h]); - - internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1); - reiserfs_invalidate_buffer(tb, tbSh); - - return; - } - - if ( tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1 ) { /* join S[h] with R[h] */ - RFALSE( tb->lnum[h] != 0, - "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", - h, tb->lnum[h]); - - internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1); - - reiserfs_invalidate_buffer(tb,tbSh); - return; - } - - if ( tb->lnum[h] < 0 ) { /* borrow from left neighbor L[h] */ - RFALSE( tb->rnum[h] != 0, - "wrong tb->rnum[%d]==%d when borrow from L[h]", h, tb->rnum[h]); - /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]);*/ - internal_shift_right (INTERNAL_SHIFT_FROM_L_TO_S, tb, h, -tb->lnum[h]); - return; - } - - if ( tb->rnum[h] < 0 ) { /* borrow from right neighbor R[h] */ - RFALSE( tb->lnum[h] != 0, - "invalid tb->lnum[%d]==%d when borrow from R[h]", - h, tb->lnum[h]); - internal_shift_left (INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]);/*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]);*/ - return; - } - - if ( tb->lnum[h] > 0 ) { /* split S[h] into two parts and put them into neighbors */ - RFALSE( tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, - "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", - h, tb->lnum[h], h, tb->rnum[h], n); - - internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);/*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]);*/ - internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]); - - reiserfs_invalidate_buffer (tb, tbSh); - - return; - } - reiserfs_panic (tb->tb_sb, "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d", - h, tb->lnum[h], h, tb->rnum[h]); -} + if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */ + RFALSE(tb->rnum[h] != 0, + "wrong tb->rnum[%d]==%d when borrow from L[h]", h, + tb->rnum[h]); + /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */ + internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, + -tb->lnum[h]); + return; + } + + if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */ + RFALSE(tb->lnum[h] != 0, + "invalid tb->lnum[%d]==%d when borrow from R[h]", + h, tb->lnum[h]); + internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]); /*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */ + return; + } + + if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */ + RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, + "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", + h, tb->lnum[h], h, tb->rnum[h], n); + + internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); /*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */ + internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, + tb->rnum[h]); + + reiserfs_invalidate_buffer(tb, tbSh); + + return; + } + reiserfs_panic(tb->tb_sb, + "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d", + h, tb->lnum[h], h, tb->rnum[h]); +} /* Replace delimiting key of buffers L[h] and S[h] by the given key.*/ -static void replace_lkey ( - struct tree_balance * tb, - int h, - struct item_head * key - ) +static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key) { - RFALSE( tb->L[h] == NULL || tb->CFL[h] == NULL, - "L[h](%p) and CFL[h](%p) must exist in replace_lkey", - tb->L[h], tb->CFL[h]); + RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL, + "L[h](%p) and CFL[h](%p) must exist in replace_lkey", + tb->L[h], tb->CFL[h]); - if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) - return; + if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) + return; - memcpy (B_N_PDELIM_KEY(tb->CFL[h],tb->lkey[h]), key, KEY_SIZE); + memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE); - do_balance_mark_internal_dirty (tb, tb->CFL[h],0); + do_balance_mark_internal_dirty(tb, tb->CFL[h], 0); } - /* Replace delimiting key of buffers S[h] and R[h] by the given key.*/ -static void replace_rkey ( - struct tree_balance * tb, - int h, - struct item_head * key - ) +static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key) { - RFALSE( tb->R[h] == NULL || tb->CFR[h] == NULL, - "R[h](%p) and CFR[h](%p) must exist in replace_rkey", - tb->R[h], tb->CFR[h]); - RFALSE( B_NR_ITEMS(tb->R[h]) == 0, - "R[h] can not be empty if it exists (item number=%d)", - B_NR_ITEMS(tb->R[h])); + RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL, + "R[h](%p) and CFR[h](%p) must exist in replace_rkey", + tb->R[h], tb->CFR[h]); + RFALSE(B_NR_ITEMS(tb->R[h]) == 0, + "R[h] can not be empty if it exists (item number=%d)", + B_NR_ITEMS(tb->R[h])); - memcpy (B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]), key, KEY_SIZE); + memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE); - do_balance_mark_internal_dirty (tb, tb->CFR[h], 0); + do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); } - -int balance_internal (struct tree_balance * tb, /* tree_balance structure */ - int h, /* level of the tree */ - int child_pos, - struct item_head * insert_key, /* key for insertion on higher level */ - struct buffer_head ** insert_ptr /* node for insertion on higher level*/ +int balance_internal(struct tree_balance *tb, /* tree_balance structure */ + int h, /* level of the tree */ + int child_pos, struct item_head *insert_key, /* key for insertion on higher level */ + struct buffer_head **insert_ptr /* node for insertion on higher level */ ) /* if inserting/pasting { - child_pos is the position of the node-pointer in S[h] that * - pointed to S[h-1] before balancing of the h-1 level; * + child_pos is the position of the node-pointer in S[h] that * + pointed to S[h-1] before balancing of the h-1 level; * this means that new pointers and items must be inserted AFTER * child_pos } else { - it is the position of the leftmost pointer that must be deleted (together with - its corresponding key to the left of the pointer) - as a result of the previous level's balancing. - } -*/ + it is the position of the leftmost pointer that must be deleted (together with + its corresponding key to the left of the pointer) + as a result of the previous level's balancing. + } + */ { - struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h); - struct buffer_info bi; - int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ - int insert_num, n, k; - struct buffer_head * S_new; - struct item_head new_insert_key; - struct buffer_head * new_insert_ptr = NULL; - struct item_head * new_insert_key_addr = insert_key; - - RFALSE( h < 1, "h (%d) can not be < 1 on internal level", h); - - PROC_INFO_INC( tb -> tb_sb, balance_at[ h ] ); - - order = ( tbSh ) ? PATH_H_POSITION (tb->tb_path, h + 1)/*tb->S[h]->b_item_order*/ : 0; - - /* Using insert_size[h] calculate the number insert_num of items - that must be inserted to or deleted from S[h]. */ - insert_num = tb->insert_size[h]/((int)(KEY_SIZE + DC_SIZE)); - - /* Check whether insert_num is proper **/ - RFALSE( insert_num < -2 || insert_num > 2, - "incorrect number of items inserted to the internal node (%d)", - insert_num); - RFALSE( h > 1 && (insert_num > 1 || insert_num < -1), - "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level", - insert_num, h); - - /* Make balance in case insert_num < 0 */ - if ( insert_num < 0 ) { - balance_internal_when_delete (tb, h, child_pos); - return order; - } - - k = 0; - if ( tb->lnum[h] > 0 ) { - /* shift lnum[h] items from S[h] to the left neighbor L[h]. - check how many of new items fall into L[h] or CFL[h] after - shifting */ - n = B_NR_ITEMS (tb->L[h]); /* number of items in L[h] */ - if ( tb->lnum[h] <= child_pos ) { - /* new items don't fall into L[h] or CFL[h] */ - internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); - /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]);*/ - child_pos -= tb->lnum[h]; - } else if ( tb->lnum[h] > child_pos + insert_num ) { - /* all new items fall into L[h] */ - internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h] - insert_num); - /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh, - tb->lnum[h]-insert_num); - */ - /* insert insert_num keys and node-pointers into L[h] */ - bi.tb = tb; - bi.bi_bh = tb->L[h]; - bi.bi_parent = tb->FL[h]; - bi.bi_position = get_left_neighbor_position (tb, h); - internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next*/ n + child_pos + 1, - insert_num,insert_key,insert_ptr); - - insert_num = 0; - } else { - struct disk_child * dc; - - /* some items fall into L[h] or CFL[h], but some don't fall */ - internal_shift1_left(tb,h,child_pos+1); - /* calculate number of new items that fall into L[h] */ - k = tb->lnum[h] - child_pos - 1; - bi.tb = tb; - bi.bi_bh = tb->L[h]; - bi.bi_parent = tb->FL[h]; - bi.bi_position = get_left_neighbor_position (tb, h); - internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next,*/ n + child_pos + 1,k, - insert_key,insert_ptr); - - replace_lkey(tb,h,insert_key + k); - - /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */ - dc = B_N_CHILD(tbSh, 0); - put_dc_size( dc, MAX_CHILD_SIZE(insert_ptr[k]) - B_FREE_SPACE (insert_ptr[k])); - put_dc_block_number( dc, insert_ptr[k]->b_blocknr ); - - do_balance_mark_internal_dirty (tb, tbSh, 0); - - k++; - insert_key += k; - insert_ptr += k; - insert_num -= k; - child_pos = 0; + struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); + struct buffer_info bi; + int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ + int insert_num, n, k; + struct buffer_head *S_new; + struct item_head new_insert_key; + struct buffer_head *new_insert_ptr = NULL; + struct item_head *new_insert_key_addr = insert_key; + + RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h); + + PROC_INFO_INC(tb->tb_sb, balance_at[h]); + + order = + (tbSh) ? PATH_H_POSITION(tb->tb_path, + h + 1) /*tb->S[h]->b_item_order */ : 0; + + /* Using insert_size[h] calculate the number insert_num of items + that must be inserted to or deleted from S[h]. */ + insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); + + /* Check whether insert_num is proper * */ + RFALSE(insert_num < -2 || insert_num > 2, + "incorrect number of items inserted to the internal node (%d)", + insert_num); + RFALSE(h > 1 && (insert_num > 1 || insert_num < -1), + "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level", + insert_num, h); + + /* Make balance in case insert_num < 0 */ + if (insert_num < 0) { + balance_internal_when_delete(tb, h, child_pos); + return order; } - } /* tb->lnum[h] > 0 */ - - if ( tb->rnum[h] > 0 ) { - /*shift rnum[h] items from S[h] to the right neighbor R[h]*/ - /* check how many of new items fall into R or CFR after shifting */ - n = B_NR_ITEMS (tbSh); /* number of items in S[h] */ - if ( n - tb->rnum[h] >= child_pos ) - /* new items fall into S[h] */ - /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]);*/ - internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]); - else - if ( n + insert_num - tb->rnum[h] < child_pos ) - { - /* all new items fall into R[h] */ - /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h], - tb->rnum[h] - insert_num);*/ - internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h] - insert_num); - - /* insert insert_num keys and node-pointers into R[h] */ - bi.tb = tb; - bi.bi_bh = tb->R[h]; - bi.bi_parent = tb->FR[h]; - bi.bi_position = get_right_neighbor_position (tb, h); - internal_insert_childs (&bi, /*tb->R[h],tb->S[h-1]->b_next*/ child_pos - n - insert_num + tb->rnum[h] - 1, - insert_num,insert_key,insert_ptr); - insert_num = 0; - } - else - { - struct disk_child * dc; - - /* one of the items falls into CFR[h] */ - internal_shift1_right(tb,h,n - child_pos + 1); - /* calculate number of new items that fall into R[h] */ - k = tb->rnum[h] - n + child_pos - 1; - bi.tb = tb; - bi.bi_bh = tb->R[h]; - bi.bi_parent = tb->FR[h]; - bi.bi_position = get_right_neighbor_position (tb, h); - internal_insert_childs (&bi, /*tb->R[h], tb->R[h]->b_child,*/ 0, k, insert_key + 1, insert_ptr + 1); - replace_rkey(tb,h,insert_key + insert_num - k - 1); + k = 0; + if (tb->lnum[h] > 0) { + /* shift lnum[h] items from S[h] to the left neighbor L[h]. + check how many of new items fall into L[h] or CFL[h] after + shifting */ + n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ + if (tb->lnum[h] <= child_pos) { + /* new items don't fall into L[h] or CFL[h] */ + internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, + tb->lnum[h]); + /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */ + child_pos -= tb->lnum[h]; + } else if (tb->lnum[h] > child_pos + insert_num) { + /* all new items fall into L[h] */ + internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, + tb->lnum[h] - insert_num); + /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh, + tb->lnum[h]-insert_num); + */ + /* insert insert_num keys and node-pointers into L[h] */ + bi.tb = tb; + bi.bi_bh = tb->L[h]; + bi.bi_parent = tb->FL[h]; + bi.bi_position = get_left_neighbor_position(tb, h); + internal_insert_childs(&bi, + /*tb->L[h], tb->S[h-1]->b_next */ + n + child_pos + 1, + insert_num, insert_key, + insert_ptr); + + insert_num = 0; + } else { + struct disk_child *dc; + + /* some items fall into L[h] or CFL[h], but some don't fall */ + internal_shift1_left(tb, h, child_pos + 1); + /* calculate number of new items that fall into L[h] */ + k = tb->lnum[h] - child_pos - 1; + bi.tb = tb; + bi.bi_bh = tb->L[h]; + bi.bi_parent = tb->FL[h]; + bi.bi_position = get_left_neighbor_position(tb, h); + internal_insert_childs(&bi, + /*tb->L[h], tb->S[h-1]->b_next, */ + n + child_pos + 1, k, + insert_key, insert_ptr); + + replace_lkey(tb, h, insert_key + k); + + /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */ + dc = B_N_CHILD(tbSh, 0); + put_dc_size(dc, + MAX_CHILD_SIZE(insert_ptr[k]) - + B_FREE_SPACE(insert_ptr[k])); + put_dc_block_number(dc, insert_ptr[k]->b_blocknr); + + do_balance_mark_internal_dirty(tb, tbSh, 0); + + k++; + insert_key += k; + insert_ptr += k; + insert_num -= k; + child_pos = 0; + } + } + /* tb->lnum[h] > 0 */ + if (tb->rnum[h] > 0) { + /*shift rnum[h] items from S[h] to the right neighbor R[h] */ + /* check how many of new items fall into R or CFR after shifting */ + n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ + if (n - tb->rnum[h] >= child_pos) + /* new items fall into S[h] */ + /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */ + internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, + tb->rnum[h]); + else if (n + insert_num - tb->rnum[h] < child_pos) { + /* all new items fall into R[h] */ + /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h], + tb->rnum[h] - insert_num); */ + internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, + tb->rnum[h] - insert_num); + + /* insert insert_num keys and node-pointers into R[h] */ + bi.tb = tb; + bi.bi_bh = tb->R[h]; + bi.bi_parent = tb->FR[h]; + bi.bi_position = get_right_neighbor_position(tb, h); + internal_insert_childs(&bi, + /*tb->R[h],tb->S[h-1]->b_next */ + child_pos - n - insert_num + + tb->rnum[h] - 1, + insert_num, insert_key, + insert_ptr); + insert_num = 0; + } else { + struct disk_child *dc; + + /* one of the items falls into CFR[h] */ + internal_shift1_right(tb, h, n - child_pos + 1); + /* calculate number of new items that fall into R[h] */ + k = tb->rnum[h] - n + child_pos - 1; + bi.tb = tb; + bi.bi_bh = tb->R[h]; + bi.bi_parent = tb->FR[h]; + bi.bi_position = get_right_neighbor_position(tb, h); + internal_insert_childs(&bi, + /*tb->R[h], tb->R[h]->b_child, */ + 0, k, insert_key + 1, + insert_ptr + 1); + + replace_rkey(tb, h, insert_key + insert_num - k - 1); + + /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */ + dc = B_N_CHILD(tb->R[h], 0); + put_dc_size(dc, + MAX_CHILD_SIZE(insert_ptr + [insert_num - k - 1]) - + B_FREE_SPACE(insert_ptr + [insert_num - k - 1])); + put_dc_block_number(dc, + insert_ptr[insert_num - k - + 1]->b_blocknr); + + do_balance_mark_internal_dirty(tb, tb->R[h], 0); + + insert_num -= (k + 1); + } + } - /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1]*/ - dc = B_N_CHILD(tb->R[h], 0); - put_dc_size( dc, MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) - - B_FREE_SPACE (insert_ptr[insert_num-k-1])); - put_dc_block_number( dc, insert_ptr[insert_num-k-1]->b_blocknr ); + /** Fill new node that appears instead of S[h] **/ + RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); + RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); - do_balance_mark_internal_dirty (tb, tb->R[h],0); + if (!tb->blknum[h]) { /* node S[h] is empty now */ + RFALSE(!tbSh, "S[h] is equal NULL"); - insert_num -= (k + 1); - } - } + /* do what is needed for buffer thrown from tree */ + reiserfs_invalidate_buffer(tb, tbSh); + return order; + } - /** Fill new node that appears instead of S[h] **/ - RFALSE( tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); - RFALSE( tb->blknum[h] < 0, "blknum can not be < 0"); + if (!tbSh) { + /* create new root */ + struct disk_child *dc; + struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1); + struct block_head *blkh; - if ( ! tb->blknum[h] ) - { /* node S[h] is empty now */ - RFALSE( ! tbSh, "S[h] is equal NULL"); + if (tb->blknum[h] != 1) + reiserfs_panic(NULL, + "balance_internal: One new node required for creating the new root"); + /* S[h] = empty buffer from the list FEB. */ + tbSh = get_FEB(tb); + blkh = B_BLK_HEAD(tbSh); + set_blkh_level(blkh, h + 1); - /* do what is needed for buffer thrown from tree */ - reiserfs_invalidate_buffer(tb,tbSh); - return order; - } - - if ( ! tbSh ) { - /* create new root */ - struct disk_child * dc; - struct buffer_head * tbSh_1 = PATH_H_PBUFFER (tb->tb_path, h - 1); - struct block_head * blkh; - - - if ( tb->blknum[h] != 1 ) - reiserfs_panic(NULL, "balance_internal: One new node required for creating the new root"); - /* S[h] = empty buffer from the list FEB. */ - tbSh = get_FEB (tb); - blkh = B_BLK_HEAD(tbSh); - set_blkh_level( blkh, h + 1 ); - - /* Put the unique node-pointer to S[h] that points to S[h-1]. */ - - dc = B_N_CHILD(tbSh, 0); - put_dc_block_number( dc, tbSh_1->b_blocknr ); - put_dc_size( dc, (MAX_CHILD_SIZE (tbSh_1) - B_FREE_SPACE (tbSh_1))); - - tb->insert_size[h] -= DC_SIZE; - set_blkh_free_space( blkh, blkh_free_space(blkh) - DC_SIZE ); - - do_balance_mark_internal_dirty (tb, tbSh, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - check_internal (tbSh); - /*&&&&&&&&&&&&&&&&&&&&&&&&*/ - - /* put new root into path structure */ - PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) = tbSh; - - /* Change root in structure super block. */ - PUT_SB_ROOT_BLOCK( tb->tb_sb, tbSh->b_blocknr ); - PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1 ); - do_balance_mark_sb_dirty (tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1); - } - - if ( tb->blknum[h] == 2 ) { - int snum; - struct buffer_info dest_bi, src_bi; + /* Put the unique node-pointer to S[h] that points to S[h-1]. */ + + dc = B_N_CHILD(tbSh, 0); + put_dc_block_number(dc, tbSh_1->b_blocknr); + put_dc_size(dc, + (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1))); + + tb->insert_size[h] -= DC_SIZE; + set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE); + do_balance_mark_internal_dirty(tb, tbSh, 0); - /* S_new = free buffer from list FEB */ - S_new = get_FEB(tb); - - set_blkh_level( B_BLK_HEAD(S_new), h + 1 ); - - dest_bi.tb = tb; - dest_bi.bi_bh = S_new; - dest_bi.bi_parent = NULL; - dest_bi.bi_position = 0; - src_bi.tb = tb; - src_bi.bi_bh = tbSh; - src_bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); - src_bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); - - n = B_NR_ITEMS (tbSh); /* number of items in S[h] */ - snum = (insert_num + n + 1)/2; - if ( n - snum >= child_pos ) { - /* new items don't fall into S_new */ - /* store the delimiting key for the next level */ - /* new_insert_key = (n - snum)'th key in S[h] */ - memcpy (&new_insert_key,B_N_PDELIM_KEY(tbSh,n - snum), - KEY_SIZE); - /* last parameter is del_par */ - internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum, 0); - /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0);*/ - } else if ( n + insert_num - snum < child_pos ) { - /* all new items fall into S_new */ - /* store the delimiting key for the next level */ - /* new_insert_key = (n + insert_item - snum)'th key in S[h] */ - memcpy(&new_insert_key,B_N_PDELIM_KEY(tbSh,n + insert_num - snum), - KEY_SIZE); - /* last parameter is del_par */ - internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum - insert_num, 0); - /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0);*/ - - /* insert insert_num keys and node-pointers into S_new */ - internal_insert_childs (&dest_bi, /*S_new,tb->S[h-1]->b_next,*/child_pos - n - insert_num + snum - 1, - insert_num,insert_key,insert_ptr); - - insert_num = 0; - } else { - struct disk_child * dc; - - /* some items fall into S_new, but some don't fall */ - /* last parameter is del_par */ - internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, n - child_pos + 1, 1); - /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1);*/ - /* calculate number of new items that fall into S_new */ - k = snum - n + child_pos - 1; - - internal_insert_childs (&dest_bi, /*S_new,*/ 0, k, insert_key + 1, insert_ptr+1); - - /* new_insert_key = insert_key[insert_num - k - 1] */ - memcpy(&new_insert_key,insert_key + insert_num - k - 1, - KEY_SIZE); - /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */ - - dc = B_N_CHILD(S_new,0); - put_dc_size( dc, (MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) - - B_FREE_SPACE(insert_ptr[insert_num-k-1])) ); - put_dc_block_number( dc, insert_ptr[insert_num-k-1]->b_blocknr ); - - do_balance_mark_internal_dirty (tb, S_new,0); - - insert_num -= (k + 1); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + check_internal(tbSh); + /*&&&&&&&&&&&&&&&&&&&&&&&& */ + + /* put new root into path structure */ + PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) = + tbSh; + + /* Change root in structure super block. */ + PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr); + PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1); + do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1); } - /* new_insert_ptr = node_pointer to S_new */ - new_insert_ptr = S_new; - - RFALSE (!buffer_journaled(S_new) || buffer_journal_dirty(S_new) || - buffer_dirty (S_new), - "cm-00001: bad S_new (%b)", S_new); - - // S_new is released in unfix_nodes - } - - n = B_NR_ITEMS (tbSh); /*number of items in S[h] */ - - if ( 0 <= child_pos && child_pos <= n && insert_num > 0 ) { - bi.tb = tb; - bi.bi_bh = tbSh; - bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); - bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); - internal_insert_childs ( - &bi,/*tbSh,*/ - /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next,*/ - child_pos,insert_num,insert_key,insert_ptr - ); + + if (tb->blknum[h] == 2) { + int snum; + struct buffer_info dest_bi, src_bi; + + /* S_new = free buffer from list FEB */ + S_new = get_FEB(tb); + + set_blkh_level(B_BLK_HEAD(S_new), h + 1); + + dest_bi.tb = tb; + dest_bi.bi_bh = S_new; + dest_bi.bi_parent = NULL; + dest_bi.bi_position = 0; + src_bi.tb = tb; + src_bi.bi_bh = tbSh; + src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); + src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); + + n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ + snum = (insert_num + n + 1) / 2; + if (n - snum >= child_pos) { + /* new items don't fall into S_new */ + /* store the delimiting key for the next level */ + /* new_insert_key = (n - snum)'th key in S[h] */ + memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum), + KEY_SIZE); + /* last parameter is del_par */ + internal_move_pointers_items(&dest_bi, &src_bi, + LAST_TO_FIRST, snum, 0); + /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */ + } else if (n + insert_num - snum < child_pos) { + /* all new items fall into S_new */ + /* store the delimiting key for the next level */ + /* new_insert_key = (n + insert_item - snum)'th key in S[h] */ + memcpy(&new_insert_key, + B_N_PDELIM_KEY(tbSh, n + insert_num - snum), + KEY_SIZE); + /* last parameter is del_par */ + internal_move_pointers_items(&dest_bi, &src_bi, + LAST_TO_FIRST, + snum - insert_num, 0); + /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */ + + /* insert insert_num keys and node-pointers into S_new */ + internal_insert_childs(&dest_bi, + /*S_new,tb->S[h-1]->b_next, */ + child_pos - n - insert_num + + snum - 1, + insert_num, insert_key, + insert_ptr); + + insert_num = 0; + } else { + struct disk_child *dc; + + /* some items fall into S_new, but some don't fall */ + /* last parameter is del_par */ + internal_move_pointers_items(&dest_bi, &src_bi, + LAST_TO_FIRST, + n - child_pos + 1, 1); + /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */ + /* calculate number of new items that fall into S_new */ + k = snum - n + child_pos - 1; + + internal_insert_childs(&dest_bi, /*S_new, */ 0, k, + insert_key + 1, insert_ptr + 1); + + /* new_insert_key = insert_key[insert_num - k - 1] */ + memcpy(&new_insert_key, insert_key + insert_num - k - 1, + KEY_SIZE); + /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */ + + dc = B_N_CHILD(S_new, 0); + put_dc_size(dc, + (MAX_CHILD_SIZE + (insert_ptr[insert_num - k - 1]) - + B_FREE_SPACE(insert_ptr + [insert_num - k - 1]))); + put_dc_block_number(dc, + insert_ptr[insert_num - k - + 1]->b_blocknr); + + do_balance_mark_internal_dirty(tb, S_new, 0); + + insert_num -= (k + 1); + } + /* new_insert_ptr = node_pointer to S_new */ + new_insert_ptr = S_new; + + RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new) + || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", + S_new); + + // S_new is released in unfix_nodes } + n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ - memcpy (new_insert_key_addr,&new_insert_key,KEY_SIZE); + if (0 <= child_pos && child_pos <= n && insert_num > 0) { + bi.tb = tb; + bi.bi_bh = tbSh; + bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); + bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); + internal_insert_childs(&bi, /*tbSh, */ + /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next, */ + child_pos, insert_num, insert_key, + insert_ptr); + } + + memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE); insert_ptr[0] = new_insert_ptr; return order; - } - - - +} diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 289d864fe73..1aaf2c7d44e 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -18,107 +18,109 @@ #include #include -extern int reiserfs_default_io_size; /* default io size devuned in super.c */ +extern int reiserfs_default_io_size; /* default io size devuned in super.c */ static int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to); + unsigned from, unsigned to); static int reiserfs_prepare_write(struct file *f, struct page *page, unsigned from, unsigned to); -void reiserfs_delete_inode (struct inode * inode) +void reiserfs_delete_inode(struct inode *inode) { - /* We need blocks for transaction + (user+group) quota update (possibly delete) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); - struct reiserfs_transaction_handle th ; - - reiserfs_write_lock(inode->i_sb); + /* We need blocks for transaction + (user+group) quota update (possibly delete) */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 2 + + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); + struct reiserfs_transaction_handle th; - /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ - if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ - down (&inode->i_sem); + reiserfs_write_lock(inode->i_sb); - reiserfs_delete_xattrs (inode); + /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ + if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ + down(&inode->i_sem); - if (journal_begin(&th, inode->i_sb, jbegin_count)) { - up (&inode->i_sem); - goto out; - } - reiserfs_update_inode_transaction(inode) ; + reiserfs_delete_xattrs(inode); - if (reiserfs_delete_object (&th, inode)) { - up (&inode->i_sem); - goto out; - } + if (journal_begin(&th, inode->i_sb, jbegin_count)) { + up(&inode->i_sem); + goto out; + } + reiserfs_update_inode_transaction(inode); - /* Do quota update inside a transaction for journaled quotas. We must do that - * after delete_object so that quota updates go into the same transaction as - * stat data deletion */ - DQUOT_FREE_INODE(inode); + if (reiserfs_delete_object(&th, inode)) { + up(&inode->i_sem); + goto out; + } - if (journal_end(&th, inode->i_sb, jbegin_count)) { - up (&inode->i_sem); - goto out; - } + /* Do quota update inside a transaction for journaled quotas. We must do that + * after delete_object so that quota updates go into the same transaction as + * stat data deletion */ + DQUOT_FREE_INODE(inode); + + if (journal_end(&th, inode->i_sb, jbegin_count)) { + up(&inode->i_sem); + goto out; + } - up (&inode->i_sem); + up(&inode->i_sem); - /* all items of file are deleted, so we can remove "save" link */ - remove_save_link (inode, 0/* not truncate */); /* we can't do anything - * about an error here */ - } else { - /* no object items are in the tree */ - ; - } -out: - clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */ - inode->i_blocks = 0; - reiserfs_write_unlock(inode->i_sb); + /* all items of file are deleted, so we can remove "save" link */ + remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything + * about an error here */ + } else { + /* no object items are in the tree */ + ; + } + out: + clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ + inode->i_blocks = 0; + reiserfs_write_unlock(inode->i_sb); } -static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid, - loff_t offset, int type, int length ) +static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, + __u32 objectid, loff_t offset, int type, int length) { - key->version = version; + key->version = version; - key->on_disk_key.k_dir_id = dirid; - key->on_disk_key.k_objectid = objectid; - set_cpu_key_k_offset (key, offset); - set_cpu_key_k_type (key, type); - key->key_length = length; + key->on_disk_key.k_dir_id = dirid; + key->on_disk_key.k_objectid = objectid; + set_cpu_key_k_offset(key, offset); + set_cpu_key_k_type(key, type); + key->key_length = length; } - /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set offset and type of key */ -void make_cpu_key (struct cpu_key * key, struct inode * inode, loff_t offset, - int type, int length ) +void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, + int type, int length) { - _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id), - le32_to_cpu (INODE_PKEY (inode)->k_objectid), - offset, type, length); + _make_cpu_key(key, get_inode_item_key_version(inode), + le32_to_cpu(INODE_PKEY(inode)->k_dir_id), + le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, + length); } - // // when key is 0, do not set version and short key // -inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key, - int version, - loff_t offset, int type, int length, - int entry_count/*or ih_free_space*/) +inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, + int version, + loff_t offset, int type, int length, + int entry_count /*or ih_free_space */ ) { - if (key) { - ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id); - ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid); - } - put_ih_version( ih, version ); - set_le_ih_k_offset (ih, offset); - set_le_ih_k_type (ih, type); - put_ih_item_len( ih, length ); - /* set_ih_free_space (ih, 0);*/ - // for directory items it is entry count, for directs and stat - // datas - 0xffff, for indirects - 0 - put_ih_entry_count( ih, entry_count ); + if (key) { + ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); + ih->ih_key.k_objectid = + cpu_to_le32(key->on_disk_key.k_objectid); + } + put_ih_version(ih, version); + set_le_ih_k_offset(ih, offset); + set_le_ih_k_type(ih, type); + put_ih_item_len(ih, length); + /* set_ih_free_space (ih, 0); */ + // for directory items it is entry count, for directs and stat + // datas - 0xffff, for indirects - 0 + put_ih_entry_count(ih, entry_count); } // @@ -153,84 +155,84 @@ inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key ** to be unmapped, so that block_prepare_write will correctly call ** reiserfs_get_block to convert the tail into an unformatted node */ -static inline void fix_tail_page_for_writing(struct page *page) { - struct buffer_head *head, *next, *bh ; - - if (page && page_has_buffers(page)) { - head = page_buffers(page) ; - bh = head ; - do { - next = bh->b_this_page ; - if (buffer_mapped(bh) && bh->b_blocknr == 0) { - reiserfs_unmap_buffer(bh) ; - } - bh = next ; - } while (bh != head) ; - } +static inline void fix_tail_page_for_writing(struct page *page) +{ + struct buffer_head *head, *next, *bh; + + if (page && page_has_buffers(page)) { + head = page_buffers(page); + bh = head; + do { + next = bh->b_this_page; + if (buffer_mapped(bh) && bh->b_blocknr == 0) { + reiserfs_unmap_buffer(bh); + } + bh = next; + } while (bh != head); + } } /* reiserfs_get_block does not need to allocate a block only if it has been done already or non-hole position has been found in the indirect item */ -static inline int allocation_needed (int retval, b_blocknr_t allocated, - struct item_head * ih, - __le32 * item, int pos_in_item) +static inline int allocation_needed(int retval, b_blocknr_t allocated, + struct item_head *ih, + __le32 * item, int pos_in_item) { - if (allocated) - return 0; - if (retval == POSITION_FOUND && is_indirect_le_ih (ih) && - get_block_num(item, pos_in_item)) - return 0; - return 1; + if (allocated) + return 0; + if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && + get_block_num(item, pos_in_item)) + return 0; + return 1; } -static inline int indirect_item_found (int retval, struct item_head * ih) +static inline int indirect_item_found(int retval, struct item_head *ih) { - return (retval == POSITION_FOUND) && is_indirect_le_ih (ih); + return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); } - -static inline void set_block_dev_mapped (struct buffer_head * bh, - b_blocknr_t block, struct inode * inode) +static inline void set_block_dev_mapped(struct buffer_head *bh, + b_blocknr_t block, struct inode *inode) { map_bh(bh, inode->i_sb, block); } - // // files which were created in the earlier version can not be longer, // than 2 gb // -static int file_capable (struct inode * inode, long block) +static int file_capable(struct inode *inode, long block) { - if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file. - block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb - return 1; + if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. + block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb + return 1; - return 0; + return 0; } /*static*/ int restart_transaction(struct reiserfs_transaction_handle *th, - struct inode *inode, struct path *path) { - struct super_block *s = th->t_super ; - int len = th->t_blocks_allocated ; - int err; - - BUG_ON (!th->t_trans_id); - BUG_ON (!th->t_refcount); - - /* we cannot restart while nested */ - if (th->t_refcount > 1) { - return 0 ; - } - pathrelse(path) ; - reiserfs_update_sd(th, inode) ; - err = journal_end(th, s, len) ; - if (!err) { - err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6) ; - if (!err) - reiserfs_update_inode_transaction(inode) ; - } - return err; + struct inode *inode, struct path *path) +{ + struct super_block *s = th->t_super; + int len = th->t_blocks_allocated; + int err; + + BUG_ON(!th->t_trans_id); + BUG_ON(!th->t_refcount); + + /* we cannot restart while nested */ + if (th->t_refcount > 1) { + return 0; + } + pathrelse(path); + reiserfs_update_sd(th, inode); + err = journal_end(th, s, len); + if (!err) { + err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); + if (!err) + reiserfs_update_inode_transaction(inode); + } + return err; } // it is called by get_block when create == 0. Returns block number @@ -241,190 +243,192 @@ static int file_capable (struct inode * inode, long block) // Please improve the english/clarity in the comment above, as it is // hard to understand. -static int _get_block_create_0 (struct inode * inode, long block, - struct buffer_head * bh_result, - int args) +static int _get_block_create_0(struct inode *inode, long block, + struct buffer_head *bh_result, int args) { - INITIALIZE_PATH (path); - struct cpu_key key; - struct buffer_head * bh; - struct item_head * ih, tmp_ih; - int fs_gen ; - int blocknr; - char * p = NULL; - int chars; - int ret ; - int result ; - int done = 0 ; - unsigned long offset ; - - // prepare the key to look for the 'block'-th block of file - make_cpu_key (&key, inode, - (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); - -research: - result = search_for_position_by_key (inode->i_sb, &key, &path) ; - if (result != POSITION_FOUND) { - pathrelse (&path); - if (p) - kunmap(bh_result->b_page) ; - if (result == IO_ERROR) - return -EIO; - // We do not return -ENOENT if there is a hole but page is uptodate, because it means - // That there is some MMAPED data associated with it that is yet to be written to disk. - if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { - return -ENOENT ; - } - return 0 ; - } - - // - bh = get_last_bh (&path); - ih = get_ih (&path); - if (is_indirect_le_ih (ih)) { - __le32 * ind_item = (__le32 *)B_I_PITEM (bh, ih); - - /* FIXME: here we could cache indirect item or part of it in - the inode to avoid search_by_key in case of subsequent - access to file */ - blocknr = get_block_num(ind_item, path.pos_in_item) ; - ret = 0 ; - if (blocknr) { - map_bh(bh_result, inode->i_sb, blocknr); - if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { - set_buffer_boundary(bh_result); - } - } else - // We do not return -ENOENT if there is a hole but page is uptodate, because it means - // That there is some MMAPED data associated with it that is yet to be written to disk. - if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { - ret = -ENOENT ; - } - - pathrelse (&path); - if (p) - kunmap(bh_result->b_page) ; - return ret ; - } - - // requested data are in direct item(s) - if (!(args & GET_BLOCK_READ_DIRECT)) { - // we are called by bmap. FIXME: we can not map block of file - // when it is stored in direct item(s) - pathrelse (&path); - if (p) - kunmap(bh_result->b_page) ; - return -ENOENT; - } - - /* if we've got a direct item, and the buffer or page was uptodate, - ** we don't want to pull data off disk again. skip to the - ** end, where we map the buffer and return - */ - if (buffer_uptodate(bh_result)) { - goto finished ; - } else - /* - ** grab_tail_page can trigger calls to reiserfs_get_block on up to date - ** pages without any buffers. If the page is up to date, we don't want - ** read old data off disk. Set the up to date bit on the buffer instead - ** and jump to the end - */ - if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { + INITIALIZE_PATH(path); + struct cpu_key key; + struct buffer_head *bh; + struct item_head *ih, tmp_ih; + int fs_gen; + int blocknr; + char *p = NULL; + int chars; + int ret; + int result; + int done = 0; + unsigned long offset; + + // prepare the key to look for the 'block'-th block of file + make_cpu_key(&key, inode, + (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, + 3); + + research: + result = search_for_position_by_key(inode->i_sb, &key, &path); + if (result != POSITION_FOUND) { + pathrelse(&path); + if (p) + kunmap(bh_result->b_page); + if (result == IO_ERROR) + return -EIO; + // We do not return -ENOENT if there is a hole but page is uptodate, because it means + // That there is some MMAPED data associated with it that is yet to be written to disk. + if ((args & GET_BLOCK_NO_HOLE) + && !PageUptodate(bh_result->b_page)) { + return -ENOENT; + } + return 0; + } + // + bh = get_last_bh(&path); + ih = get_ih(&path); + if (is_indirect_le_ih(ih)) { + __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); + + /* FIXME: here we could cache indirect item or part of it in + the inode to avoid search_by_key in case of subsequent + access to file */ + blocknr = get_block_num(ind_item, path.pos_in_item); + ret = 0; + if (blocknr) { + map_bh(bh_result, inode->i_sb, blocknr); + if (path.pos_in_item == + ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { + set_buffer_boundary(bh_result); + } + } else + // We do not return -ENOENT if there is a hole but page is uptodate, because it means + // That there is some MMAPED data associated with it that is yet to be written to disk. + if ((args & GET_BLOCK_NO_HOLE) + && !PageUptodate(bh_result->b_page)) { + ret = -ENOENT; + } + + pathrelse(&path); + if (p) + kunmap(bh_result->b_page); + return ret; + } + // requested data are in direct item(s) + if (!(args & GET_BLOCK_READ_DIRECT)) { + // we are called by bmap. FIXME: we can not map block of file + // when it is stored in direct item(s) + pathrelse(&path); + if (p) + kunmap(bh_result->b_page); + return -ENOENT; + } + + /* if we've got a direct item, and the buffer or page was uptodate, + ** we don't want to pull data off disk again. skip to the + ** end, where we map the buffer and return + */ + if (buffer_uptodate(bh_result)) { + goto finished; + } else + /* + ** grab_tail_page can trigger calls to reiserfs_get_block on up to date + ** pages without any buffers. If the page is up to date, we don't want + ** read old data off disk. Set the up to date bit on the buffer instead + ** and jump to the end + */ + if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { set_buffer_uptodate(bh_result); - goto finished ; - } - - // read file tail into part of page - offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ; - fs_gen = get_generation(inode->i_sb) ; - copy_item_head (&tmp_ih, ih); - - /* we only want to kmap if we are reading the tail into the page. - ** this is not the common case, so we don't kmap until we are - ** sure we need to. But, this means the item might move if - ** kmap schedules - */ - if (!p) { - p = (char *)kmap(bh_result->b_page) ; - if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { - goto research; - } - } - p += offset ; - memset (p, 0, inode->i_sb->s_blocksize); - do { - if (!is_direct_le_ih (ih)) { - BUG (); - } - /* make sure we don't read more bytes than actually exist in - ** the file. This can happen in odd cases where i_size isn't - ** correct, and when direct item padding results in a few - ** extra bytes at the end of the direct item - */ - if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) - break ; - if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { - chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item; - done = 1 ; - } else { - chars = ih_item_len(ih) - path.pos_in_item; - } - memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars); - - if (done) - break ; - - p += chars; - - if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1)) - // we done, if read direct item is not the last item of - // node FIXME: we could try to check right delimiting key - // to see whether direct item continues in the right - // neighbor or rely on i_size - break; - - // update key to look for the next piece - set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars); - result = search_for_position_by_key (inode->i_sb, &key, &path); - if (result != POSITION_FOUND) - // i/o error most likely - break; - bh = get_last_bh (&path); - ih = get_ih (&path); - } while (1); - - flush_dcache_page(bh_result->b_page) ; - kunmap(bh_result->b_page) ; - -finished: - pathrelse (&path); - - if (result == IO_ERROR) - return -EIO; - - /* this buffer has valid data, but isn't valid for io. mapping it to - * block #0 tells the rest of reiserfs it just has a tail in it - */ - map_bh(bh_result, inode->i_sb, 0); - set_buffer_uptodate (bh_result); - return 0; -} + goto finished; + } + // read file tail into part of page + offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); + fs_gen = get_generation(inode->i_sb); + copy_item_head(&tmp_ih, ih); + + /* we only want to kmap if we are reading the tail into the page. + ** this is not the common case, so we don't kmap until we are + ** sure we need to. But, this means the item might move if + ** kmap schedules + */ + if (!p) { + p = (char *)kmap(bh_result->b_page); + if (fs_changed(fs_gen, inode->i_sb) + && item_moved(&tmp_ih, &path)) { + goto research; + } + } + p += offset; + memset(p, 0, inode->i_sb->s_blocksize); + do { + if (!is_direct_le_ih(ih)) { + BUG(); + } + /* make sure we don't read more bytes than actually exist in + ** the file. This can happen in odd cases where i_size isn't + ** correct, and when direct item padding results in a few + ** extra bytes at the end of the direct item + */ + if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) + break; + if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { + chars = + inode->i_size - (le_ih_k_offset(ih) - 1) - + path.pos_in_item; + done = 1; + } else { + chars = ih_item_len(ih) - path.pos_in_item; + } + memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); + + if (done) + break; + + p += chars; + + if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) + // we done, if read direct item is not the last item of + // node FIXME: we could try to check right delimiting key + // to see whether direct item continues in the right + // neighbor or rely on i_size + break; + + // update key to look for the next piece + set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); + result = search_for_position_by_key(inode->i_sb, &key, &path); + if (result != POSITION_FOUND) + // i/o error most likely + break; + bh = get_last_bh(&path); + ih = get_ih(&path); + } while (1); + + flush_dcache_page(bh_result->b_page); + kunmap(bh_result->b_page); + + finished: + pathrelse(&path); + + if (result == IO_ERROR) + return -EIO; + /* this buffer has valid data, but isn't valid for io. mapping it to + * block #0 tells the rest of reiserfs it just has a tail in it + */ + map_bh(bh_result, inode->i_sb, 0); + set_buffer_uptodate(bh_result); + return 0; +} // this is called to create file map. So, _get_block_create_0 will not // read direct item -static int reiserfs_bmap (struct inode * inode, sector_t block, - struct buffer_head * bh_result, int create) +static int reiserfs_bmap(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create) { - if (!file_capable (inode, block)) - return -EFBIG; - - reiserfs_write_lock(inode->i_sb); - /* do not read the direct item */ - _get_block_create_0 (inode, block, bh_result, 0) ; - reiserfs_write_unlock(inode->i_sb); - return 0; + if (!file_capable(inode, block)) + return -EFBIG; + + reiserfs_write_lock(inode->i_sb); + /* do not read the direct item */ + _get_block_create_0(inode, block, bh_result, 0); + reiserfs_write_unlock(inode->i_sb); + return 0; } /* special version of get_block that is only used by grab_tail_page right @@ -444,9 +448,11 @@ static int reiserfs_bmap (struct inode * inode, sector_t block, ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, ** don't use this function. */ -static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block, - struct buffer_head * bh_result, int create) { - return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ; +static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, + struct buffer_head *bh_result, + int create) +{ + return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); } /* This is special helper for reiserfs_get_block in case we are executing @@ -457,43 +463,42 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, struct buffer_head *bh_result, int create) { - int ret ; - - bh_result->b_page = NULL; - - /* We set the b_size before reiserfs_get_block call since it is - referenced in convert_tail_for_hole() that may be called from - reiserfs_get_block() */ - bh_result->b_size = (1 << inode->i_blkbits); - - ret = reiserfs_get_block(inode, iblock, bh_result, - create | GET_BLOCK_NO_DANGLE) ; - if (ret) - goto out; - - /* don't allow direct io onto tail pages */ - if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { - /* make sure future calls to the direct io funcs for this offset - ** in the file fail by unmapping the buffer - */ - clear_buffer_mapped(bh_result); - ret = -EINVAL ; - } - /* Possible unpacked tail. Flush the data before pages have - disappeared */ - if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { - int err; - lock_kernel(); - err = reiserfs_commit_for_inode(inode); - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - unlock_kernel(); - if (err < 0) - ret = err; - } -out: - return ret ; -} + int ret; + + bh_result->b_page = NULL; + /* We set the b_size before reiserfs_get_block call since it is + referenced in convert_tail_for_hole() that may be called from + reiserfs_get_block() */ + bh_result->b_size = (1 << inode->i_blkbits); + + ret = reiserfs_get_block(inode, iblock, bh_result, + create | GET_BLOCK_NO_DANGLE); + if (ret) + goto out; + + /* don't allow direct io onto tail pages */ + if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { + /* make sure future calls to the direct io funcs for this offset + ** in the file fail by unmapping the buffer + */ + clear_buffer_mapped(bh_result); + ret = -EINVAL; + } + /* Possible unpacked tail. Flush the data before pages have + disappeared */ + if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { + int err; + lock_kernel(); + err = reiserfs_commit_for_inode(inode); + REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; + unlock_kernel(); + if (err < 0) + ret = err; + } + out: + return ret; +} /* ** helper function for when reiserfs_get_block is called for a hole @@ -505,490 +510,547 @@ out: ** you should not be in a transaction, or have any paths held when you ** call this. */ -static int convert_tail_for_hole(struct inode *inode, - struct buffer_head *bh_result, - loff_t tail_offset) { - unsigned long index ; - unsigned long tail_end ; - unsigned long tail_start ; - struct page * tail_page ; - struct page * hole_page = bh_result->b_page ; - int retval = 0 ; - - if ((tail_offset & (bh_result->b_size - 1)) != 1) - return -EIO ; - - /* always try to read until the end of the block */ - tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ; - tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ; - - index = tail_offset >> PAGE_CACHE_SHIFT ; - /* hole_page can be zero in case of direct_io, we are sure - that we cannot get here if we write with O_DIRECT into - tail page */ - if (!hole_page || index != hole_page->index) { - tail_page = grab_cache_page(inode->i_mapping, index) ; - retval = -ENOMEM; - if (!tail_page) { - goto out ; - } - } else { - tail_page = hole_page ; - } - - /* we don't have to make sure the conversion did not happen while - ** we were locking the page because anyone that could convert - ** must first take i_sem. - ** - ** We must fix the tail page for writing because it might have buffers - ** that are mapped, but have a block number of 0. This indicates tail - ** data that has been read directly into the page, and block_prepare_write - ** won't trigger a get_block in this case. - */ - fix_tail_page_for_writing(tail_page) ; - retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); - if (retval) - goto unlock ; - - /* tail conversion might change the data in the page */ - flush_dcache_page(tail_page) ; - - retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end) ; - -unlock: - if (tail_page != hole_page) { - unlock_page(tail_page) ; - page_cache_release(tail_page) ; - } -out: - return retval ; +static int convert_tail_for_hole(struct inode *inode, + struct buffer_head *bh_result, + loff_t tail_offset) +{ + unsigned long index; + unsigned long tail_end; + unsigned long tail_start; + struct page *tail_page; + struct page *hole_page = bh_result->b_page; + int retval = 0; + + if ((tail_offset & (bh_result->b_size - 1)) != 1) + return -EIO; + + /* always try to read until the end of the block */ + tail_start = tail_offset & (PAGE_CACHE_SIZE - 1); + tail_end = (tail_start | (bh_result->b_size - 1)) + 1; + + index = tail_offset >> PAGE_CACHE_SHIFT; + /* hole_page can be zero in case of direct_io, we are sure + that we cannot get here if we write with O_DIRECT into + tail page */ + if (!hole_page || index != hole_page->index) { + tail_page = grab_cache_page(inode->i_mapping, index); + retval = -ENOMEM; + if (!tail_page) { + goto out; + } + } else { + tail_page = hole_page; + } + + /* we don't have to make sure the conversion did not happen while + ** we were locking the page because anyone that could convert + ** must first take i_sem. + ** + ** We must fix the tail page for writing because it might have buffers + ** that are mapped, but have a block number of 0. This indicates tail + ** data that has been read directly into the page, and block_prepare_write + ** won't trigger a get_block in this case. + */ + fix_tail_page_for_writing(tail_page); + retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); + if (retval) + goto unlock; + + /* tail conversion might change the data in the page */ + flush_dcache_page(tail_page); + + retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); + + unlock: + if (tail_page != hole_page) { + unlock_page(tail_page); + page_cache_release(tail_page); + } + out: + return retval; } static inline int _allocate_block(struct reiserfs_transaction_handle *th, - long block, - struct inode *inode, - b_blocknr_t *allocated_block_nr, - struct path * path, - int flags) { - BUG_ON (!th->t_trans_id); - + long block, + struct inode *inode, + b_blocknr_t * allocated_block_nr, + struct path *path, int flags) +{ + BUG_ON(!th->t_trans_id); + #ifdef REISERFS_PREALLOCATE - if (!(flags & GET_BLOCK_NO_ISEM)) { - return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block); - } + if (!(flags & GET_BLOCK_NO_ISEM)) { + return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, + path, block); + } #endif - return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block); + return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, + block); } -int reiserfs_get_block (struct inode * inode, sector_t block, - struct buffer_head * bh_result, int create) +int reiserfs_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create) { - int repeat, retval = 0; - b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is (unsigned) 32 bit int - INITIALIZE_PATH(path); - int pos_in_item; - struct cpu_key key; - struct buffer_head * bh, * unbh = NULL; - struct item_head * ih, tmp_ih; - __le32 * item; - int done; - int fs_gen; - struct reiserfs_transaction_handle *th = NULL; - /* space reserved in transaction batch: - . 3 balancings in direct->indirect conversion - . 1 block involved into reiserfs_update_sd() - XXX in practically impossible worst case direct2indirect() - can incur (much) more than 3 balancings. - quota update for user, group */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); - int version; - int dangle = 1; - loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ; - - /* bad.... */ - reiserfs_write_lock(inode->i_sb); - version = get_inode_item_key_version (inode); - - if (block < 0) { - reiserfs_write_unlock(inode->i_sb); - return -EIO; - } + int repeat, retval = 0; + b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int + INITIALIZE_PATH(path); + int pos_in_item; + struct cpu_key key; + struct buffer_head *bh, *unbh = NULL; + struct item_head *ih, tmp_ih; + __le32 *item; + int done; + int fs_gen; + struct reiserfs_transaction_handle *th = NULL; + /* space reserved in transaction batch: + . 3 balancings in direct->indirect conversion + . 1 block involved into reiserfs_update_sd() + XXX in practically impossible worst case direct2indirect() + can incur (much) more than 3 balancings. + quota update for user, group */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 3 + 1 + + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); + int version; + int dangle = 1; + loff_t new_offset = + (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; + + /* bad.... */ + reiserfs_write_lock(inode->i_sb); + version = get_inode_item_key_version(inode); - if (!file_capable (inode, block)) { - reiserfs_write_unlock(inode->i_sb); - return -EFBIG; - } - - /* if !create, we aren't changing the FS, so we don't need to - ** log anything, so we don't need to start a transaction - */ - if (!(create & GET_BLOCK_CREATE)) { - int ret ; - /* find number of block-th logical block of the file */ - ret = _get_block_create_0 (inode, block, bh_result, - create | GET_BLOCK_READ_DIRECT) ; - reiserfs_write_unlock(inode->i_sb); - return ret; - } - /* - * if we're already in a transaction, make sure to close - * any new transactions we start in this func - */ - if ((create & GET_BLOCK_NO_DANGLE) || - reiserfs_transaction_running(inode->i_sb)) - dangle = 0; - - /* If file is of such a size, that it might have a tail and tails are enabled - ** we should mark it as possibly needing tail packing on close - */ - if ( (have_large_tails (inode->i_sb) && inode->i_size < i_block_size (inode)*4) || - (have_small_tails (inode->i_sb) && inode->i_size < i_block_size(inode)) ) - REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ; - - /* set the key of the first byte in the 'block'-th block of file */ - make_cpu_key (&key, inode, new_offset, - TYPE_ANY, 3/*key length*/); - if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { -start_trans: - th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); - if (!th) { - retval = -ENOMEM; - goto failure; - } - reiserfs_update_inode_transaction(inode) ; - } - research: - - retval = search_for_position_by_key (inode->i_sb, &key, &path); - if (retval == IO_ERROR) { - retval = -EIO; - goto failure; - } - - bh = get_last_bh (&path); - ih = get_ih (&path); - item = get_item (&path); - pos_in_item = path.pos_in_item; - - fs_gen = get_generation (inode->i_sb); - copy_item_head (&tmp_ih, ih); - - if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) { - /* we have to allocate block for the unformatted node */ - if (!th) { - pathrelse(&path) ; - goto start_trans; - } - - repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create); - - if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { - /* restart the transaction to give the journal a chance to free - ** some blocks. releases the path, so we have to go back to - ** research if we succeed on the second try - */ - SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; - retval = restart_transaction(th, inode, &path) ; - if (retval) - goto failure; - repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create); - - if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { - goto research ; - } - if (repeat == QUOTA_EXCEEDED) - retval = -EDQUOT; - else - retval = -ENOSPC; - goto failure; - } - - if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { - goto research; - } - } - - if (indirect_item_found (retval, ih)) { - b_blocknr_t unfm_ptr; - /* 'block'-th block is in the file already (there is - corresponding cell in some indirect item). But it may be - zero unformatted node pointer (hole) */ - unfm_ptr = get_block_num (item, pos_in_item); - if (unfm_ptr == 0) { - /* use allocated block to plug the hole */ - reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; - if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; - goto research; - } - set_buffer_new(bh_result); - if (buffer_dirty(bh_result) && reiserfs_data_ordered(inode->i_sb)) - reiserfs_add_ordered_list(inode, bh_result); - put_block_num(item, pos_in_item, allocated_block_nr) ; - unfm_ptr = allocated_block_nr; - journal_mark_dirty (th, inode->i_sb, bh); - reiserfs_update_sd(th, inode) ; - } - set_block_dev_mapped(bh_result, unfm_ptr, inode); - pathrelse (&path); - retval = 0; - if (!dangle && th) - retval = reiserfs_end_persistent_transaction(th); + if (block < 0) { + reiserfs_write_unlock(inode->i_sb); + return -EIO; + } - reiserfs_write_unlock(inode->i_sb); - - /* the item was found, so new blocks were not added to the file - ** there is no need to make sure the inode is updated with this - ** transaction - */ - return retval; - } - - if (!th) { - pathrelse(&path) ; - goto start_trans; - } - - /* desired position is not found or is in the direct item. We have - to append file with holes up to 'block'-th block converting - direct items to indirect one if necessary */ - done = 0; - do { - if (is_statdata_le_ih (ih)) { - __le32 unp = 0; - struct cpu_key tmp_key; - - /* indirect item has to be inserted */ - make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT, - UNFM_P_SIZE, 0/* free_space */); - - if (cpu_key_k_offset (&key) == 1) { - /* we are going to add 'block'-th block to the file. Use - allocated block for that */ - unp = cpu_to_le32 (allocated_block_nr); - set_block_dev_mapped (bh_result, allocated_block_nr, inode); - set_buffer_new(bh_result); - done = 1; - } - tmp_key = key; // ;) - set_cpu_key_k_offset (&tmp_key, 1); - PATH_LAST_POSITION(&path) ++; - - retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, inode, (char *)&unp); - if (retval) { - reiserfs_free_block (th, inode, allocated_block_nr, 1); - goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST - } - //mark_tail_converted (inode); - } else if (is_direct_le_ih (ih)) { - /* direct item has to be converted */ - loff_t tail_offset; - - tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; - if (tail_offset == cpu_key_k_offset (&key)) { - /* direct item we just found fits into block we have - to map. Convert it into unformatted node: use - bh_result for the conversion */ - set_block_dev_mapped (bh_result, allocated_block_nr, inode); - unbh = bh_result; - done = 1; - } else { - /* we have to padd file tail stored in direct item(s) - up to block size and convert it to unformatted - node. FIXME: this should also get into page cache */ - - pathrelse(&path) ; - /* - * ugly, but we can only end the transaction if - * we aren't nested - */ - BUG_ON (!th->t_refcount); - if (th->t_refcount == 1) { - retval = reiserfs_end_persistent_transaction(th); - th = NULL; - if (retval) + if (!file_capable(inode, block)) { + reiserfs_write_unlock(inode->i_sb); + return -EFBIG; + } + + /* if !create, we aren't changing the FS, so we don't need to + ** log anything, so we don't need to start a transaction + */ + if (!(create & GET_BLOCK_CREATE)) { + int ret; + /* find number of block-th logical block of the file */ + ret = _get_block_create_0(inode, block, bh_result, + create | GET_BLOCK_READ_DIRECT); + reiserfs_write_unlock(inode->i_sb); + return ret; + } + /* + * if we're already in a transaction, make sure to close + * any new transactions we start in this func + */ + if ((create & GET_BLOCK_NO_DANGLE) || + reiserfs_transaction_running(inode->i_sb)) + dangle = 0; + + /* If file is of such a size, that it might have a tail and tails are enabled + ** we should mark it as possibly needing tail packing on close + */ + if ((have_large_tails(inode->i_sb) + && inode->i_size < i_block_size(inode) * 4) + || (have_small_tails(inode->i_sb) + && inode->i_size < i_block_size(inode))) + REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; + + /* set the key of the first byte in the 'block'-th block of file */ + make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); + if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { + start_trans: + th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); + if (!th) { + retval = -ENOMEM; goto failure; } + reiserfs_update_inode_transaction(inode); + } + research: - retval = convert_tail_for_hole(inode, bh_result, tail_offset) ; - if (retval) { - if ( retval != -ENOSPC ) - reiserfs_warning (inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d", inode->i_ino, retval) ; - if (allocated_block_nr) { - /* the bitmap, the super, and the stat data == 3 */ - if (!th) - th = reiserfs_persistent_transaction(inode->i_sb,3); - if (th) - reiserfs_free_block (th,inode,allocated_block_nr,1); - } - goto failure ; - } - goto research ; - } - retval = direct2indirect (th, inode, &path, unbh, tail_offset); - if (retval) { - reiserfs_unmap_buffer(unbh); - reiserfs_free_block (th, inode, allocated_block_nr, 1); - goto failure; - } - /* it is important the set_buffer_uptodate is done after - ** the direct2indirect. The buffer might contain valid - ** data newer than the data on disk (read by readpage, changed, - ** and then sent here by writepage). direct2indirect needs - ** to know if unbh was already up to date, so it can decide - ** if the data in unbh needs to be replaced with data from - ** the disk - */ - set_buffer_uptodate (unbh); - - /* unbh->b_page == NULL in case of DIRECT_IO request, this means - buffer will disappear shortly, so it should not be added to - */ - if ( unbh->b_page ) { - /* we've converted the tail, so we must - ** flush unbh before the transaction commits - */ - reiserfs_add_tail_list(inode, unbh) ; - - /* mark it dirty now to prevent commit_write from adding - ** this buffer to the inode's dirty buffer list - */ - /* - * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). - * It's still atomic, but it sets the page dirty too, - * which makes it eligible for writeback at any time by the - * VM (which was also the case with __mark_buffer_dirty()) - */ - mark_buffer_dirty(unbh) ; - } - } else { - /* append indirect item with holes if needed, when appending - pointer to 'block'-th block use block, which is already - allocated */ - struct cpu_key tmp_key; - unp_t unf_single=0; // We use this in case we need to allocate only - // one block which is a fastpath - unp_t *un; - __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE; - __u64 blocks_needed; - - RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, - "vs-804: invalid position for append"); - /* indirect item has to be appended, set up key of that position */ - make_cpu_key (&tmp_key, inode, - le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize), - //pos_in_item * inode->i_sb->s_blocksize, - TYPE_INDIRECT, 3);// key type is unimportant - - blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits); - RFALSE( blocks_needed < 0, "green-805: invalid offset"); - - if ( blocks_needed == 1 ) { - un = &unf_single; - } else { - un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE, - GFP_ATOMIC); // We need to avoid scheduling. - if ( !un) { - un = &unf_single; - blocks_needed = 1; - max_to_insert = 0; - } else - memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert)); - } - if ( blocks_needed <= max_to_insert) { - /* we are going to add target block to the file. Use allocated - block for that */ - un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr); - set_block_dev_mapped (bh_result, allocated_block_nr, inode); - set_buffer_new(bh_result); - done = 1; - } else { - /* paste hole to the indirect item */ - /* If kmalloc failed, max_to_insert becomes zero and it means we - only have space for one block */ - blocks_needed=max_to_insert?max_to_insert:1; - } - retval = reiserfs_paste_into_item (th, &path, &tmp_key, inode, (char *)un, UNFM_P_SIZE * blocks_needed); - - if (blocks_needed != 1) - kfree(un); - - if (retval) { - reiserfs_free_block (th, inode, allocated_block_nr, 1); - goto failure; - } - if (!done) { - /* We need to mark new file size in case this function will be - interrupted/aborted later on. And we may do this only for - holes. */ - inode->i_size += inode->i_sb->s_blocksize * blocks_needed; - } - } - - if (done == 1) - break; - - /* this loop could log more blocks than we had originally asked - ** for. So, we have to allow the transaction to end if it is - ** too big or too full. Update the inode so things are - ** consistent if we crash before the function returns - ** - ** release the path so that anybody waiting on the path before - ** ending their transaction will be able to continue. - */ - if (journal_transaction_should_end(th, th->t_blocks_allocated)) { - retval = restart_transaction(th, inode, &path) ; - if (retval) - goto failure; - } - /* inserting indirect pointers for a hole can take a - ** long time. reschedule if needed - */ - cond_resched(); - - retval = search_for_position_by_key (inode->i_sb, &key, &path); + retval = search_for_position_by_key(inode->i_sb, &key, &path); if (retval == IO_ERROR) { - retval = -EIO; - goto failure; - } - if (retval == POSITION_FOUND) { - reiserfs_warning (inode->i_sb, "vs-825: reiserfs_get_block: " - "%K should not be found", &key); - retval = -EEXIST; - if (allocated_block_nr) - reiserfs_free_block (th, inode, allocated_block_nr, 1); - pathrelse(&path) ; - goto failure; - } - bh = get_last_bh (&path); - ih = get_ih (&path); - item = get_item (&path); + retval = -EIO; + goto failure; + } + + bh = get_last_bh(&path); + ih = get_ih(&path); + item = get_item(&path); pos_in_item = path.pos_in_item; - } while (1); + fs_gen = get_generation(inode->i_sb); + copy_item_head(&tmp_ih, ih); + + if (allocation_needed + (retval, allocated_block_nr, ih, item, pos_in_item)) { + /* we have to allocate block for the unformatted node */ + if (!th) { + pathrelse(&path); + goto start_trans; + } + + repeat = + _allocate_block(th, block, inode, &allocated_block_nr, + &path, create); + + if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { + /* restart the transaction to give the journal a chance to free + ** some blocks. releases the path, so we have to go back to + ** research if we succeed on the second try + */ + SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; + retval = restart_transaction(th, inode, &path); + if (retval) + goto failure; + repeat = + _allocate_block(th, block, inode, + &allocated_block_nr, NULL, create); + + if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { + goto research; + } + if (repeat == QUOTA_EXCEEDED) + retval = -EDQUOT; + else + retval = -ENOSPC; + goto failure; + } + + if (fs_changed(fs_gen, inode->i_sb) + && item_moved(&tmp_ih, &path)) { + goto research; + } + } + + if (indirect_item_found(retval, ih)) { + b_blocknr_t unfm_ptr; + /* 'block'-th block is in the file already (there is + corresponding cell in some indirect item). But it may be + zero unformatted node pointer (hole) */ + unfm_ptr = get_block_num(item, pos_in_item); + if (unfm_ptr == 0) { + /* use allocated block to plug the hole */ + reiserfs_prepare_for_journal(inode->i_sb, bh, 1); + if (fs_changed(fs_gen, inode->i_sb) + && item_moved(&tmp_ih, &path)) { + reiserfs_restore_prepared_buffer(inode->i_sb, + bh); + goto research; + } + set_buffer_new(bh_result); + if (buffer_dirty(bh_result) + && reiserfs_data_ordered(inode->i_sb)) + reiserfs_add_ordered_list(inode, bh_result); + put_block_num(item, pos_in_item, allocated_block_nr); + unfm_ptr = allocated_block_nr; + journal_mark_dirty(th, inode->i_sb, bh); + reiserfs_update_sd(th, inode); + } + set_block_dev_mapped(bh_result, unfm_ptr, inode); + pathrelse(&path); + retval = 0; + if (!dangle && th) + retval = reiserfs_end_persistent_transaction(th); + + reiserfs_write_unlock(inode->i_sb); + + /* the item was found, so new blocks were not added to the file + ** there is no need to make sure the inode is updated with this + ** transaction + */ + return retval; + } + + if (!th) { + pathrelse(&path); + goto start_trans; + } + + /* desired position is not found or is in the direct item. We have + to append file with holes up to 'block'-th block converting + direct items to indirect one if necessary */ + done = 0; + do { + if (is_statdata_le_ih(ih)) { + __le32 unp = 0; + struct cpu_key tmp_key; + + /* indirect item has to be inserted */ + make_le_item_head(&tmp_ih, &key, version, 1, + TYPE_INDIRECT, UNFM_P_SIZE, + 0 /* free_space */ ); + + if (cpu_key_k_offset(&key) == 1) { + /* we are going to add 'block'-th block to the file. Use + allocated block for that */ + unp = cpu_to_le32(allocated_block_nr); + set_block_dev_mapped(bh_result, + allocated_block_nr, inode); + set_buffer_new(bh_result); + done = 1; + } + tmp_key = key; // ;) + set_cpu_key_k_offset(&tmp_key, 1); + PATH_LAST_POSITION(&path)++; + + retval = + reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, + inode, (char *)&unp); + if (retval) { + reiserfs_free_block(th, inode, + allocated_block_nr, 1); + goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST + } + //mark_tail_converted (inode); + } else if (is_direct_le_ih(ih)) { + /* direct item has to be converted */ + loff_t tail_offset; + + tail_offset = + ((le_ih_k_offset(ih) - + 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; + if (tail_offset == cpu_key_k_offset(&key)) { + /* direct item we just found fits into block we have + to map. Convert it into unformatted node: use + bh_result for the conversion */ + set_block_dev_mapped(bh_result, + allocated_block_nr, inode); + unbh = bh_result; + done = 1; + } else { + /* we have to padd file tail stored in direct item(s) + up to block size and convert it to unformatted + node. FIXME: this should also get into page cache */ + + pathrelse(&path); + /* + * ugly, but we can only end the transaction if + * we aren't nested + */ + BUG_ON(!th->t_refcount); + if (th->t_refcount == 1) { + retval = + reiserfs_end_persistent_transaction + (th); + th = NULL; + if (retval) + goto failure; + } + + retval = + convert_tail_for_hole(inode, bh_result, + tail_offset); + if (retval) { + if (retval != -ENOSPC) + reiserfs_warning(inode->i_sb, + "clm-6004: convert tail failed inode %lu, error %d", + inode->i_ino, + retval); + if (allocated_block_nr) { + /* the bitmap, the super, and the stat data == 3 */ + if (!th) + th = reiserfs_persistent_transaction(inode->i_sb, 3); + if (th) + reiserfs_free_block(th, + inode, + allocated_block_nr, + 1); + } + goto failure; + } + goto research; + } + retval = + direct2indirect(th, inode, &path, unbh, + tail_offset); + if (retval) { + reiserfs_unmap_buffer(unbh); + reiserfs_free_block(th, inode, + allocated_block_nr, 1); + goto failure; + } + /* it is important the set_buffer_uptodate is done after + ** the direct2indirect. The buffer might contain valid + ** data newer than the data on disk (read by readpage, changed, + ** and then sent here by writepage). direct2indirect needs + ** to know if unbh was already up to date, so it can decide + ** if the data in unbh needs to be replaced with data from + ** the disk + */ + set_buffer_uptodate(unbh); + + /* unbh->b_page == NULL in case of DIRECT_IO request, this means + buffer will disappear shortly, so it should not be added to + */ + if (unbh->b_page) { + /* we've converted the tail, so we must + ** flush unbh before the transaction commits + */ + reiserfs_add_tail_list(inode, unbh); + + /* mark it dirty now to prevent commit_write from adding + ** this buffer to the inode's dirty buffer list + */ + /* + * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). + * It's still atomic, but it sets the page dirty too, + * which makes it eligible for writeback at any time by the + * VM (which was also the case with __mark_buffer_dirty()) + */ + mark_buffer_dirty(unbh); + } + } else { + /* append indirect item with holes if needed, when appending + pointer to 'block'-th block use block, which is already + allocated */ + struct cpu_key tmp_key; + unp_t unf_single = 0; // We use this in case we need to allocate only + // one block which is a fastpath + unp_t *un; + __u64 max_to_insert = + MAX_ITEM_LEN(inode->i_sb->s_blocksize) / + UNFM_P_SIZE; + __u64 blocks_needed; + + RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, + "vs-804: invalid position for append"); + /* indirect item has to be appended, set up key of that position */ + make_cpu_key(&tmp_key, inode, + le_key_k_offset(version, + &(ih->ih_key)) + + op_bytes_number(ih, + inode->i_sb->s_blocksize), + //pos_in_item * inode->i_sb->s_blocksize, + TYPE_INDIRECT, 3); // key type is unimportant + + blocks_needed = + 1 + + ((cpu_key_k_offset(&key) - + cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> + s_blocksize_bits); + RFALSE(blocks_needed < 0, "green-805: invalid offset"); + + if (blocks_needed == 1) { + un = &unf_single; + } else { + un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. + if (!un) { + un = &unf_single; + blocks_needed = 1; + max_to_insert = 0; + } else + memset(un, 0, + UNFM_P_SIZE * min(blocks_needed, + max_to_insert)); + } + if (blocks_needed <= max_to_insert) { + /* we are going to add target block to the file. Use allocated + block for that */ + un[blocks_needed - 1] = + cpu_to_le32(allocated_block_nr); + set_block_dev_mapped(bh_result, + allocated_block_nr, inode); + set_buffer_new(bh_result); + done = 1; + } else { + /* paste hole to the indirect item */ + /* If kmalloc failed, max_to_insert becomes zero and it means we + only have space for one block */ + blocks_needed = + max_to_insert ? max_to_insert : 1; + } + retval = + reiserfs_paste_into_item(th, &path, &tmp_key, inode, + (char *)un, + UNFM_P_SIZE * + blocks_needed); + + if (blocks_needed != 1) + kfree(un); + + if (retval) { + reiserfs_free_block(th, inode, + allocated_block_nr, 1); + goto failure; + } + if (!done) { + /* We need to mark new file size in case this function will be + interrupted/aborted later on. And we may do this only for + holes. */ + inode->i_size += + inode->i_sb->s_blocksize * blocks_needed; + } + } - retval = 0; + if (done == 1) + break; - failure: - if (th && (!dangle || (retval && !th->t_trans_id))) { - int err; - if (th->t_trans_id) - reiserfs_update_sd(th, inode); - err = reiserfs_end_persistent_transaction(th); - if (err) - retval = err; - } + /* this loop could log more blocks than we had originally asked + ** for. So, we have to allow the transaction to end if it is + ** too big or too full. Update the inode so things are + ** consistent if we crash before the function returns + ** + ** release the path so that anybody waiting on the path before + ** ending their transaction will be able to continue. + */ + if (journal_transaction_should_end(th, th->t_blocks_allocated)) { + retval = restart_transaction(th, inode, &path); + if (retval) + goto failure; + } + /* inserting indirect pointers for a hole can take a + ** long time. reschedule if needed + */ + cond_resched(); - reiserfs_write_unlock(inode->i_sb); - reiserfs_check_path(&path) ; - return retval; + retval = search_for_position_by_key(inode->i_sb, &key, &path); + if (retval == IO_ERROR) { + retval = -EIO; + goto failure; + } + if (retval == POSITION_FOUND) { + reiserfs_warning(inode->i_sb, + "vs-825: reiserfs_get_block: " + "%K should not be found", &key); + retval = -EEXIST; + if (allocated_block_nr) + reiserfs_free_block(th, inode, + allocated_block_nr, 1); + pathrelse(&path); + goto failure; + } + bh = get_last_bh(&path); + ih = get_ih(&path); + item = get_item(&path); + pos_in_item = path.pos_in_item; + } while (1); + + retval = 0; + + failure: + if (th && (!dangle || (retval && !th->t_trans_id))) { + int err; + if (th->t_trans_id) + reiserfs_update_sd(th, inode); + err = reiserfs_end_persistent_transaction(th); + if (err) + retval = err; + } + + reiserfs_write_unlock(inode->i_sb); + reiserfs_check_path(&path); + return retval; } static int reiserfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) + struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); + return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); } /* Compute real number of used bytes by file @@ -996,51 +1058,56 @@ reiserfs_readpages(struct file *file, struct address_space *mapping, */ static int real_space_diff(struct inode *inode, int sd_size) { - int bytes; - loff_t blocksize = inode->i_sb->s_blocksize ; - - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) - return sd_size ; - - /* End of file is also in full block with indirect reference, so round - ** up to the next block. - ** - ** there is just no way to know if the tail is actually packed - ** on the file, so we have to assume it isn't. When we pack the - ** tail, we add 4 bytes to pretend there really is an unformatted - ** node pointer - */ - bytes = ((inode->i_size + (blocksize-1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + sd_size; - return bytes ; + int bytes; + loff_t blocksize = inode->i_sb->s_blocksize; + + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) + return sd_size; + + /* End of file is also in full block with indirect reference, so round + ** up to the next block. + ** + ** there is just no way to know if the tail is actually packed + ** on the file, so we have to assume it isn't. When we pack the + ** tail, we add 4 bytes to pretend there really is an unformatted + ** node pointer + */ + bytes = + ((inode->i_size + + (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + + sd_size; + return bytes; } static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, - int sd_size) + int sd_size) { - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { - return inode->i_size + (loff_t)(real_space_diff(inode, sd_size)) ; - } - return ((loff_t)real_space_diff(inode, sd_size)) + (((loff_t)blocks) << 9); + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { + return inode->i_size + + (loff_t) (real_space_diff(inode, sd_size)); + } + return ((loff_t) real_space_diff(inode, sd_size)) + + (((loff_t) blocks) << 9); } /* Compute number of blocks used by file in ReiserFS counting */ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) { - loff_t bytes = inode_get_bytes(inode) ; - loff_t real_space = real_space_diff(inode, sd_size) ; - - /* keeps fsck and non-quota versions of reiserfs happy */ - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { - bytes += (loff_t)511 ; - } - - /* files from before the quota patch might i_blocks such that - ** bytes < real_space. Deal with that here to prevent it from - ** going negative. - */ - if (bytes < real_space) - return 0 ; - return (bytes - real_space) >> 9; + loff_t bytes = inode_get_bytes(inode); + loff_t real_space = real_space_diff(inode, sd_size); + + /* keeps fsck and non-quota versions of reiserfs happy */ + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { + bytes += (loff_t) 511; + } + + /* files from before the quota patch might i_blocks such that + ** bytes < real_space. Deal with that here to prevent it from + ** going negative. + */ + if (bytes < real_space) + return 0; + return (bytes - real_space) >> 9; } // @@ -1051,263 +1118,269 @@ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) // // called by read_locked_inode -static void init_inode (struct inode * inode, struct path * path) +static void init_inode(struct inode *inode, struct path *path) { - struct buffer_head * bh; - struct item_head * ih; - __u32 rdev; - //int version = ITEM_VERSION_1; - - bh = PATH_PLAST_BUFFER (path); - ih = PATH_PITEM_HEAD (path); - - - copy_key (INODE_PKEY (inode), &(ih->ih_key)); - inode->i_blksize = reiserfs_default_io_size; - - INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list )); - REISERFS_I(inode)->i_flags = 0; - REISERFS_I(inode)->i_prealloc_block = 0; - REISERFS_I(inode)->i_prealloc_count = 0; - REISERFS_I(inode)->i_trans_id = 0; - REISERFS_I(inode)->i_jl = NULL; - REISERFS_I(inode)->i_acl_access = NULL; - REISERFS_I(inode)->i_acl_default = NULL; - init_rwsem (&REISERFS_I(inode)->xattr_sem); - - if (stat_data_v1 (ih)) { - struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih); - unsigned long blocks; - - set_inode_item_key_version (inode, KEY_FORMAT_3_5); - set_inode_sd_version (inode, STAT_DATA_V1); - inode->i_mode = sd_v1_mode(sd); - inode->i_nlink = sd_v1_nlink(sd); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); - inode->i_size = sd_v1_size(sd); - inode->i_atime.tv_sec = sd_v1_atime(sd); - inode->i_mtime.tv_sec = sd_v1_mtime(sd); - inode->i_ctime.tv_sec = sd_v1_ctime(sd); - inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - - inode->i_blocks = sd_v1_blocks(sd); - inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id); - blocks = (inode->i_size + 511) >> 9; - blocks = _ROUND_UP (blocks, inode->i_sb->s_blocksize >> 9); - if (inode->i_blocks > blocks) { - // there was a bug in <=3.5.23 when i_blocks could take negative - // values. Starting from 3.5.17 this value could even be stored in - // stat data. For such files we set i_blocks based on file - // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be - // only updated if file's inode will ever change - inode->i_blocks = blocks; - } - - rdev = sd_v1_rdev(sd); - REISERFS_I(inode)->i_first_direct_byte = sd_v1_first_direct_byte(sd); - /* an early bug in the quota code can give us an odd number for the - ** block count. This is incorrect, fix it here. - */ - if (inode->i_blocks & 1) { - inode->i_blocks++ ; - } - inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, - SD_V1_SIZE)); - /* nopack is initially zero for v1 objects. For v2 objects, - nopack is initialised from sd_attrs */ - REISERFS_I(inode)->i_flags &= ~i_nopack_mask; - } else { - // new stat data found, but object may have old items - // (directories and symlinks) - struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih); - - inode->i_mode = sd_v2_mode(sd); - inode->i_nlink = sd_v2_nlink(sd); - inode->i_uid = sd_v2_uid(sd); - inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); - inode->i_mtime.tv_sec = sd_v2_mtime(sd); - inode->i_atime.tv_sec = sd_v2_atime(sd); - inode->i_ctime.tv_sec = sd_v2_ctime(sd); - inode->i_ctime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - inode->i_atime.tv_nsec = 0; - inode->i_blocks = sd_v2_blocks(sd); - rdev = sd_v2_rdev(sd); - if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) ) - inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id); - else - inode->i_generation = sd_v2_generation(sd); + struct buffer_head *bh; + struct item_head *ih; + __u32 rdev; + //int version = ITEM_VERSION_1; + + bh = PATH_PLAST_BUFFER(path); + ih = PATH_PITEM_HEAD(path); + + copy_key(INODE_PKEY(inode), &(ih->ih_key)); + inode->i_blksize = reiserfs_default_io_size; + + INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); + REISERFS_I(inode)->i_flags = 0; + REISERFS_I(inode)->i_prealloc_block = 0; + REISERFS_I(inode)->i_prealloc_count = 0; + REISERFS_I(inode)->i_trans_id = 0; + REISERFS_I(inode)->i_jl = NULL; + REISERFS_I(inode)->i_acl_access = NULL; + REISERFS_I(inode)->i_acl_default = NULL; + init_rwsem(&REISERFS_I(inode)->xattr_sem); + + if (stat_data_v1(ih)) { + struct stat_data_v1 *sd = + (struct stat_data_v1 *)B_I_PITEM(bh, ih); + unsigned long blocks; + + set_inode_item_key_version(inode, KEY_FORMAT_3_5); + set_inode_sd_version(inode, STAT_DATA_V1); + inode->i_mode = sd_v1_mode(sd); + inode->i_nlink = sd_v1_nlink(sd); + inode->i_uid = sd_v1_uid(sd); + inode->i_gid = sd_v1_gid(sd); + inode->i_size = sd_v1_size(sd); + inode->i_atime.tv_sec = sd_v1_atime(sd); + inode->i_mtime.tv_sec = sd_v1_mtime(sd); + inode->i_ctime.tv_sec = sd_v1_ctime(sd); + inode->i_atime.tv_nsec = 0; + inode->i_ctime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; + + inode->i_blocks = sd_v1_blocks(sd); + inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); + blocks = (inode->i_size + 511) >> 9; + blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); + if (inode->i_blocks > blocks) { + // there was a bug in <=3.5.23 when i_blocks could take negative + // values. Starting from 3.5.17 this value could even be stored in + // stat data. For such files we set i_blocks based on file + // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be + // only updated if file's inode will ever change + inode->i_blocks = blocks; + } - if (S_ISDIR (inode->i_mode) || S_ISLNK (inode->i_mode)) - set_inode_item_key_version (inode, KEY_FORMAT_3_5); - else - set_inode_item_key_version (inode, KEY_FORMAT_3_6); - REISERFS_I(inode)->i_first_direct_byte = 0; - set_inode_sd_version (inode, STAT_DATA_V2); - inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, - SD_V2_SIZE)); - /* read persistent inode attributes from sd and initalise - generic inode flags from them */ - REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd ); - sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode ); - } - - pathrelse (path); - if (S_ISREG (inode->i_mode)) { - inode->i_op = &reiserfs_file_inode_operations; - inode->i_fop = &reiserfs_file_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations ; - } else if (S_ISDIR (inode->i_mode)) { - inode->i_op = &reiserfs_dir_inode_operations; - inode->i_fop = &reiserfs_dir_operations; - } else if (S_ISLNK (inode->i_mode)) { - inode->i_op = &reiserfs_symlink_inode_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - } else { - inode->i_blocks = 0; - inode->i_op = &reiserfs_special_inode_operations; - init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); - } -} + rdev = sd_v1_rdev(sd); + REISERFS_I(inode)->i_first_direct_byte = + sd_v1_first_direct_byte(sd); + /* an early bug in the quota code can give us an odd number for the + ** block count. This is incorrect, fix it here. + */ + if (inode->i_blocks & 1) { + inode->i_blocks++; + } + inode_set_bytes(inode, + to_real_used_space(inode, inode->i_blocks, + SD_V1_SIZE)); + /* nopack is initially zero for v1 objects. For v2 objects, + nopack is initialised from sd_attrs */ + REISERFS_I(inode)->i_flags &= ~i_nopack_mask; + } else { + // new stat data found, but object may have old items + // (directories and symlinks) + struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); + + inode->i_mode = sd_v2_mode(sd); + inode->i_nlink = sd_v2_nlink(sd); + inode->i_uid = sd_v2_uid(sd); + inode->i_size = sd_v2_size(sd); + inode->i_gid = sd_v2_gid(sd); + inode->i_mtime.tv_sec = sd_v2_mtime(sd); + inode->i_atime.tv_sec = sd_v2_atime(sd); + inode->i_ctime.tv_sec = sd_v2_ctime(sd); + inode->i_ctime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_atime.tv_nsec = 0; + inode->i_blocks = sd_v2_blocks(sd); + rdev = sd_v2_rdev(sd); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + inode->i_generation = + le32_to_cpu(INODE_PKEY(inode)->k_dir_id); + else + inode->i_generation = sd_v2_generation(sd); + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + set_inode_item_key_version(inode, KEY_FORMAT_3_5); + else + set_inode_item_key_version(inode, KEY_FORMAT_3_6); + REISERFS_I(inode)->i_first_direct_byte = 0; + set_inode_sd_version(inode, STAT_DATA_V2); + inode_set_bytes(inode, + to_real_used_space(inode, inode->i_blocks, + SD_V2_SIZE)); + /* read persistent inode attributes from sd and initalise + generic inode flags from them */ + REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); + sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); + } + + pathrelse(path); + if (S_ISREG(inode->i_mode)) { + inode->i_op = &reiserfs_file_inode_operations; + inode->i_fop = &reiserfs_file_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &reiserfs_dir_inode_operations; + inode->i_fop = &reiserfs_dir_operations; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &reiserfs_symlink_inode_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations; + } else { + inode->i_blocks = 0; + inode->i_op = &reiserfs_special_inode_operations; + init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); + } +} // update new stat data with inode fields -static void inode2sd (void * sd, struct inode * inode, loff_t size) +static void inode2sd(void *sd, struct inode *inode, loff_t size) { - struct stat_data * sd_v2 = (struct stat_data *)sd; - __u16 flags; - - set_sd_v2_mode(sd_v2, inode->i_mode ); - set_sd_v2_nlink(sd_v2, inode->i_nlink ); - set_sd_v2_uid(sd_v2, inode->i_uid ); - set_sd_v2_size(sd_v2, size ); - set_sd_v2_gid(sd_v2, inode->i_gid ); - set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec ); - set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec ); - set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec ); - set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); - else - set_sd_v2_generation(sd_v2, inode->i_generation); - flags = REISERFS_I(inode)->i_attrs; - i_attrs_to_sd_attrs( inode, &flags ); - set_sd_v2_attrs( sd_v2, flags ); + struct stat_data *sd_v2 = (struct stat_data *)sd; + __u16 flags; + + set_sd_v2_mode(sd_v2, inode->i_mode); + set_sd_v2_nlink(sd_v2, inode->i_nlink); + set_sd_v2_uid(sd_v2, inode->i_uid); + set_sd_v2_size(sd_v2, size); + set_sd_v2_gid(sd_v2, inode->i_gid); + set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); + set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); + set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); + set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); + else + set_sd_v2_generation(sd_v2, inode->i_generation); + flags = REISERFS_I(inode)->i_attrs; + i_attrs_to_sd_attrs(inode, &flags); + set_sd_v2_attrs(sd_v2, flags); } - // used to copy inode's fields to old stat data -static void inode2sd_v1 (void * sd, struct inode * inode, loff_t size) +static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) { - struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd; - - set_sd_v1_mode(sd_v1, inode->i_mode ); - set_sd_v1_uid(sd_v1, inode->i_uid ); - set_sd_v1_gid(sd_v1, inode->i_gid ); - set_sd_v1_nlink(sd_v1, inode->i_nlink ); - set_sd_v1_size(sd_v1, size ); - set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec ); - set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec ); - set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec ); - - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); - else - set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); - - // Sigh. i_first_direct_byte is back - set_sd_v1_first_direct_byte(sd_v1, REISERFS_I(inode)->i_first_direct_byte); -} + struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; + + set_sd_v1_mode(sd_v1, inode->i_mode); + set_sd_v1_uid(sd_v1, inode->i_uid); + set_sd_v1_gid(sd_v1, inode->i_gid); + set_sd_v1_nlink(sd_v1, inode->i_nlink); + set_sd_v1_size(sd_v1, size); + set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); + set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); + set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); + + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); + else + set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); + // Sigh. i_first_direct_byte is back + set_sd_v1_first_direct_byte(sd_v1, + REISERFS_I(inode)->i_first_direct_byte); +} /* NOTE, you must prepare the buffer head before sending it here, ** and then log it after the call */ -static void update_stat_data (struct path * path, struct inode * inode, - loff_t size) +static void update_stat_data(struct path *path, struct inode *inode, + loff_t size) { - struct buffer_head * bh; - struct item_head * ih; - - bh = PATH_PLAST_BUFFER (path); - ih = PATH_PITEM_HEAD (path); - - if (!is_statdata_le_ih (ih)) - reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h", - INODE_PKEY (inode), ih); - - if (stat_data_v1 (ih)) { - // path points to old stat data - inode2sd_v1 (B_I_PITEM (bh, ih), inode, size); - } else { - inode2sd (B_I_PITEM (bh, ih), inode, size); - } - - return; -} + struct buffer_head *bh; + struct item_head *ih; + + bh = PATH_PLAST_BUFFER(path); + ih = PATH_PITEM_HEAD(path); + + if (!is_statdata_le_ih(ih)) + reiserfs_panic(inode->i_sb, + "vs-13065: update_stat_data: key %k, found item %h", + INODE_PKEY(inode), ih); + + if (stat_data_v1(ih)) { + // path points to old stat data + inode2sd_v1(B_I_PITEM(bh, ih), inode, size); + } else { + inode2sd(B_I_PITEM(bh, ih), inode, size); + } + return; +} -void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th, - struct inode * inode, loff_t size) +void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, + struct inode *inode, loff_t size) { - struct cpu_key key; - INITIALIZE_PATH(path); - struct buffer_head *bh ; - int fs_gen ; - struct item_head *ih, tmp_ih ; - int retval; - - BUG_ON (!th->t_trans_id); - - make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant - - for(;;) { - int pos; - /* look for the object's stat data */ - retval = search_item (inode->i_sb, &key, &path); - if (retval == IO_ERROR) { - reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: " - "i/o failure occurred trying to update %K stat data", - &key); - return; - } - if (retval == ITEM_NOT_FOUND) { - pos = PATH_LAST_POSITION (&path); - pathrelse(&path) ; - if (inode->i_nlink == 0) { - /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found");*/ - return; - } - reiserfs_warning (inode->i_sb, "vs-13060: reiserfs_update_sd: " - "stat data of object %k (nlink == %d) not found (pos %d)", - INODE_PKEY (inode), inode->i_nlink, pos); - reiserfs_check_path(&path) ; - return; - } - - /* sigh, prepare_for_journal might schedule. When it schedules the - ** FS might change. We have to detect that, and loop back to the - ** search if the stat data item has moved - */ - bh = get_last_bh(&path) ; - ih = get_ih(&path) ; - copy_item_head (&tmp_ih, ih); - fs_gen = get_generation (inode->i_sb); - reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; - if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; - continue ; /* Stat_data item has been moved after scheduling. */ - } - break; - } - update_stat_data (&path, inode, size); - journal_mark_dirty(th, th->t_super, bh) ; - pathrelse (&path); - return; + struct cpu_key key; + INITIALIZE_PATH(path); + struct buffer_head *bh; + int fs_gen; + struct item_head *ih, tmp_ih; + int retval; + + BUG_ON(!th->t_trans_id); + + make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant + + for (;;) { + int pos; + /* look for the object's stat data */ + retval = search_item(inode->i_sb, &key, &path); + if (retval == IO_ERROR) { + reiserfs_warning(inode->i_sb, + "vs-13050: reiserfs_update_sd: " + "i/o failure occurred trying to update %K stat data", + &key); + return; + } + if (retval == ITEM_NOT_FOUND) { + pos = PATH_LAST_POSITION(&path); + pathrelse(&path); + if (inode->i_nlink == 0) { + /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ + return; + } + reiserfs_warning(inode->i_sb, + "vs-13060: reiserfs_update_sd: " + "stat data of object %k (nlink == %d) not found (pos %d)", + INODE_PKEY(inode), inode->i_nlink, + pos); + reiserfs_check_path(&path); + return; + } + + /* sigh, prepare_for_journal might schedule. When it schedules the + ** FS might change. We have to detect that, and loop back to the + ** search if the stat data item has moved + */ + bh = get_last_bh(&path); + ih = get_ih(&path); + copy_item_head(&tmp_ih, ih); + fs_gen = get_generation(inode->i_sb); + reiserfs_prepare_for_journal(inode->i_sb, bh, 1); + if (fs_changed(fs_gen, inode->i_sb) + && item_moved(&tmp_ih, &path)) { + reiserfs_restore_prepared_buffer(inode->i_sb, bh); + continue; /* Stat_data item has been moved after scheduling. */ + } + break; + } + update_stat_data(&path, inode, size); + journal_mark_dirty(th, th->t_super, bh); + pathrelse(&path); + return; } /* reiserfs_read_locked_inode is called to read the inode off disk, and it @@ -1316,9 +1389,10 @@ void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th, ** corresponding iput might try to delete whatever object the inode last ** represented. */ -static void reiserfs_make_bad_inode(struct inode *inode) { - memset(INODE_PKEY(inode), 0, KEY_SIZE); - make_bad_inode(inode); +static void reiserfs_make_bad_inode(struct inode *inode) +{ + memset(INODE_PKEY(inode), 0, KEY_SIZE); + make_bad_inode(inode); } // @@ -1326,77 +1400,79 @@ static void reiserfs_make_bad_inode(struct inode *inode) { // evolved as the prototype did // -int reiserfs_init_locked_inode (struct inode * inode, void *p) +int reiserfs_init_locked_inode(struct inode *inode, void *p) { - struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p ; - inode->i_ino = args->objectid; - INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); - return 0; + struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; + inode->i_ino = args->objectid; + INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); + return 0; } /* looks for stat data in the tree, and fills up the fields of in-core inode stat data fields */ -void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args *args) +void reiserfs_read_locked_inode(struct inode *inode, + struct reiserfs_iget_args *args) { - INITIALIZE_PATH (path_to_sd); - struct cpu_key key; - unsigned long dirino; - int retval; - - dirino = args->dirid ; - - /* set version 1, version 2 could be used too, because stat data - key is the same in both versions */ - key.version = KEY_FORMAT_3_5; - key.on_disk_key.k_dir_id = dirino; - key.on_disk_key.k_objectid = inode->i_ino; - key.on_disk_key.k_offset = 0; - key.on_disk_key.k_type = 0; - - /* look for the object's stat data */ - retval = search_item (inode->i_sb, &key, &path_to_sd); - if (retval == IO_ERROR) { - reiserfs_warning (inode->i_sb, "vs-13070: reiserfs_read_locked_inode: " - "i/o failure occurred trying to find stat data of %K", - &key); - reiserfs_make_bad_inode(inode) ; - return; - } - if (retval != ITEM_FOUND) { - /* a stale NFS handle can trigger this without it being an error */ - pathrelse (&path_to_sd); - reiserfs_make_bad_inode(inode) ; - inode->i_nlink = 0; - return; - } - - init_inode (inode, &path_to_sd); - - /* It is possible that knfsd is trying to access inode of a file - that is being removed from the disk by some other thread. As we - update sd on unlink all that is required is to check for nlink - here. This bug was first found by Sizif when debugging - SquidNG/Butterfly, forgotten, and found again after Philippe - Gramoulle reproduced it. - - More logical fix would require changes in fs/inode.c:iput() to - remove inode from hash-table _after_ fs cleaned disk stuff up and - in iget() to return NULL if I_FREEING inode is found in - hash-table. */ - /* Currently there is one place where it's ok to meet inode with - nlink==0: processing of open-unlinked and half-truncated files - during mount (fs/reiserfs/super.c:finish_unfinished()). */ - if( ( inode -> i_nlink == 0 ) && - ! REISERFS_SB(inode -> i_sb) -> s_is_unlinked_ok ) { - reiserfs_warning (inode->i_sb, - "vs-13075: reiserfs_read_locked_inode: " - "dead inode read from disk %K. " - "This is likely to be race with knfsd. Ignore", - &key ); - reiserfs_make_bad_inode( inode ); - } - - reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */ + INITIALIZE_PATH(path_to_sd); + struct cpu_key key; + unsigned long dirino; + int retval; + + dirino = args->dirid; + + /* set version 1, version 2 could be used too, because stat data + key is the same in both versions */ + key.version = KEY_FORMAT_3_5; + key.on_disk_key.k_dir_id = dirino; + key.on_disk_key.k_objectid = inode->i_ino; + key.on_disk_key.k_offset = 0; + key.on_disk_key.k_type = 0; + + /* look for the object's stat data */ + retval = search_item(inode->i_sb, &key, &path_to_sd); + if (retval == IO_ERROR) { + reiserfs_warning(inode->i_sb, + "vs-13070: reiserfs_read_locked_inode: " + "i/o failure occurred trying to find stat data of %K", + &key); + reiserfs_make_bad_inode(inode); + return; + } + if (retval != ITEM_FOUND) { + /* a stale NFS handle can trigger this without it being an error */ + pathrelse(&path_to_sd); + reiserfs_make_bad_inode(inode); + inode->i_nlink = 0; + return; + } + + init_inode(inode, &path_to_sd); + + /* It is possible that knfsd is trying to access inode of a file + that is being removed from the disk by some other thread. As we + update sd on unlink all that is required is to check for nlink + here. This bug was first found by Sizif when debugging + SquidNG/Butterfly, forgotten, and found again after Philippe + Gramoulle reproduced it. + + More logical fix would require changes in fs/inode.c:iput() to + remove inode from hash-table _after_ fs cleaned disk stuff up and + in iget() to return NULL if I_FREEING inode is found in + hash-table. */ + /* Currently there is one place where it's ok to meet inode with + nlink==0: processing of open-unlinked and half-truncated files + during mount (fs/reiserfs/super.c:finish_unfinished()). */ + if ((inode->i_nlink == 0) && + !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { + reiserfs_warning(inode->i_sb, + "vs-13075: reiserfs_read_locked_inode: " + "dead inode read from disk %K. " + "This is likely to be race with knfsd. Ignore", + &key); + reiserfs_make_bad_inode(inode); + } + + reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ } @@ -1412,140 +1488,148 @@ void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args * inode numbers (objectids) are distinguished by parent directory ids. * */ -int reiserfs_find_actor( struct inode *inode, void *opaque ) +int reiserfs_find_actor(struct inode *inode, void *opaque) { - struct reiserfs_iget_args *args; + struct reiserfs_iget_args *args; - args = opaque; - /* args is already in CPU order */ - return (inode->i_ino == args->objectid) && - (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); + args = opaque; + /* args is already in CPU order */ + return (inode->i_ino == args->objectid) && + (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); } -struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key) +struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) { - struct inode * inode; - struct reiserfs_iget_args args ; - - args.objectid = key->on_disk_key.k_objectid ; - args.dirid = key->on_disk_key.k_dir_id ; - inode = iget5_locked (s, key->on_disk_key.k_objectid, - reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); - if (!inode) - return ERR_PTR(-ENOMEM) ; - - if (inode->i_state & I_NEW) { - reiserfs_read_locked_inode(inode, &args); - unlock_new_inode(inode); - } - - if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) { - /* either due to i/o error or a stale NFS handle */ - iput (inode); - inode = NULL; - } - return inode; + struct inode *inode; + struct reiserfs_iget_args args; + + args.objectid = key->on_disk_key.k_objectid; + args.dirid = key->on_disk_key.k_dir_id; + inode = iget5_locked(s, key->on_disk_key.k_objectid, + reiserfs_find_actor, reiserfs_init_locked_inode, + (void *)(&args)); + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) { + reiserfs_read_locked_inode(inode, &args); + unlock_new_inode(inode); + } + + if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { + /* either due to i/o error or a stale NFS handle */ + iput(inode); + inode = NULL; + } + return inode; } struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) { - __u32 *data = vobjp; - struct cpu_key key ; - struct dentry *result; - struct inode *inode; - - key.on_disk_key.k_objectid = data[0] ; - key.on_disk_key.k_dir_id = data[1] ; - reiserfs_write_lock(sb); - inode = reiserfs_iget(sb, &key) ; - if (inode && !IS_ERR(inode) && data[2] != 0 && - data[2] != inode->i_generation) { - iput(inode) ; - inode = NULL ; - } - reiserfs_write_unlock(sb); - if (!inode) - inode = ERR_PTR(-ESTALE); - if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + __u32 *data = vobjp; + struct cpu_key key; + struct dentry *result; + struct inode *inode; + + key.on_disk_key.k_objectid = data[0]; + key.on_disk_key.k_dir_id = data[1]; + reiserfs_write_lock(sb); + inode = reiserfs_iget(sb, &key); + if (inode && !IS_ERR(inode) && data[2] != 0 && + data[2] != inode->i_generation) { + iput(inode); + inode = NULL; + } + reiserfs_write_unlock(sb); + if (!inode) + inode = ERR_PTR(-ESTALE); + if (IS_ERR(inode)) + return ERR_PTR(PTR_ERR(inode)); + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; } -struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 *data, - int len, int fhtype, - int (*acceptable)(void *contect, struct dentry *de), - void *context) { - __u32 obj[3], parent[3]; - - /* fhtype happens to reflect the number of u32s encoded. - * due to a bug in earlier code, fhtype might indicate there - * are more u32s then actually fitted. - * so if fhtype seems to be more than len, reduce fhtype. - * Valid types are: - * 2 - objectid + dir_id - legacy support - * 3 - objectid + dir_id + generation - * 4 - objectid + dir_id + objectid and dirid of parent - legacy - * 5 - objectid + dir_id + generation + objectid and dirid of parent - * 6 - as above plus generation of directory - * 6 does not fit in NFSv2 handles - */ - if (fhtype > len) { - if (fhtype != 6 || len != 5) - reiserfs_warning (sb, "nfsd/reiserfs, fhtype=%d, len=%d - odd", - fhtype, len); - fhtype = 5; - } - - obj[0] = data[0]; - obj[1] = data[1]; - if (fhtype == 3 || fhtype >= 5) - obj[2] = data[2]; - else obj[2] = 0; /* generation number */ - - if (fhtype >= 4) { - parent[0] = data[fhtype>=5?3:2] ; - parent[1] = data[fhtype>=5?4:3] ; - if (fhtype == 6) - parent[2] = data[5]; - else parent[2] = 0; - } - return sb->s_export_op->find_exported_dentry(sb, obj, fhtype < 4 ? NULL : parent, - acceptable, context); -} +struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, + int len, int fhtype, + int (*acceptable) (void *contect, + struct dentry * de), + void *context) +{ + __u32 obj[3], parent[3]; + + /* fhtype happens to reflect the number of u32s encoded. + * due to a bug in earlier code, fhtype might indicate there + * are more u32s then actually fitted. + * so if fhtype seems to be more than len, reduce fhtype. + * Valid types are: + * 2 - objectid + dir_id - legacy support + * 3 - objectid + dir_id + generation + * 4 - objectid + dir_id + objectid and dirid of parent - legacy + * 5 - objectid + dir_id + generation + objectid and dirid of parent + * 6 - as above plus generation of directory + * 6 does not fit in NFSv2 handles + */ + if (fhtype > len) { + if (fhtype != 6 || len != 5) + reiserfs_warning(sb, + "nfsd/reiserfs, fhtype=%d, len=%d - odd", + fhtype, len); + fhtype = 5; + } + + obj[0] = data[0]; + obj[1] = data[1]; + if (fhtype == 3 || fhtype >= 5) + obj[2] = data[2]; + else + obj[2] = 0; /* generation number */ -int reiserfs_encode_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) { - struct inode *inode = dentry->d_inode ; - int maxlen = *lenp; - - if (maxlen < 3) - return 255 ; - - data[0] = inode->i_ino ; - data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ; - data[2] = inode->i_generation ; - *lenp = 3 ; - /* no room for directory info? return what we've stored so far */ - if (maxlen < 5 || ! need_parent) - return 3 ; - - spin_lock(&dentry->d_lock); - inode = dentry->d_parent->d_inode ; - data[3] = inode->i_ino ; - data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ; - *lenp = 5 ; - if (maxlen >= 6) { - data[5] = inode->i_generation ; - *lenp = 6 ; - } - spin_unlock(&dentry->d_lock); - return *lenp ; + if (fhtype >= 4) { + parent[0] = data[fhtype >= 5 ? 3 : 2]; + parent[1] = data[fhtype >= 5 ? 4 : 3]; + if (fhtype == 6) + parent[2] = data[5]; + else + parent[2] = 0; + } + return sb->s_export_op->find_exported_dentry(sb, obj, + fhtype < 4 ? NULL : parent, + acceptable, context); } +int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, + int need_parent) +{ + struct inode *inode = dentry->d_inode; + int maxlen = *lenp; + + if (maxlen < 3) + return 255; + + data[0] = inode->i_ino; + data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); + data[2] = inode->i_generation; + *lenp = 3; + /* no room for directory info? return what we've stored so far */ + if (maxlen < 5 || !need_parent) + return 3; + + spin_lock(&dentry->d_lock); + inode = dentry->d_parent->d_inode; + data[3] = inode->i_ino; + data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); + *lenp = 5; + if (maxlen >= 6) { + data[5] = inode->i_generation; + *lenp = 6; + } + spin_unlock(&dentry->d_lock); + return *lenp; +} /* looks for stat data, then copies fields to it, marks the buffer containing stat data as dirty */ @@ -1554,120 +1638,127 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_p ** to properly mark inodes for datasync and such, but only actually ** does something when called for a synchronous update. */ -int reiserfs_write_inode (struct inode * inode, int do_sync) { - struct reiserfs_transaction_handle th ; - int jbegin_count = 1 ; - - if (inode->i_sb->s_flags & MS_RDONLY) - return -EROFS; - /* memory pressure can sometimes initiate write_inode calls with sync == 1, - ** these cases are just when the system needs ram, not when the - ** inode needs to reach disk for safety, and they can safely be - ** ignored because the altered inode has already been logged. - */ - if (do_sync && !(current->flags & PF_MEMALLOC)) { - reiserfs_write_lock(inode->i_sb); - if (!journal_begin(&th, inode->i_sb, jbegin_count)) { - reiserfs_update_sd (&th, inode); - journal_end_sync(&th, inode->i_sb, jbegin_count) ; - } - reiserfs_write_unlock(inode->i_sb); - } - return 0; +int reiserfs_write_inode(struct inode *inode, int do_sync) +{ + struct reiserfs_transaction_handle th; + int jbegin_count = 1; + + if (inode->i_sb->s_flags & MS_RDONLY) + return -EROFS; + /* memory pressure can sometimes initiate write_inode calls with sync == 1, + ** these cases are just when the system needs ram, not when the + ** inode needs to reach disk for safety, and they can safely be + ** ignored because the altered inode has already been logged. + */ + if (do_sync && !(current->flags & PF_MEMALLOC)) { + reiserfs_write_lock(inode->i_sb); + if (!journal_begin(&th, inode->i_sb, jbegin_count)) { + reiserfs_update_sd(&th, inode); + journal_end_sync(&th, inode->i_sb, jbegin_count); + } + reiserfs_write_unlock(inode->i_sb); + } + return 0; } /* stat data of new object is inserted already, this inserts the item containing "." and ".." entries */ -static int reiserfs_new_directory (struct reiserfs_transaction_handle *th, - struct inode *inode, - struct item_head * ih, struct path * path, - struct inode * dir) +static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, + struct inode *inode, + struct item_head *ih, struct path *path, + struct inode *dir) { - struct super_block * sb = th->t_super; - char empty_dir [EMPTY_DIR_SIZE]; - char * body = empty_dir; - struct cpu_key key; - int retval; - - BUG_ON (!th->t_trans_id); - - _make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id), - le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/); - - /* compose item head for new item. Directories consist of items of - old type (ITEM_VERSION_1). Do not set key (second arg is 0), it - is done by reiserfs_new_inode */ - if (old_format_only (sb)) { - make_le_item_head (ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); - - make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid, - INODE_PKEY (dir)->k_dir_id, - INODE_PKEY (dir)->k_objectid ); - } else { - make_le_item_head (ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); - - make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid, - INODE_PKEY (dir)->k_dir_id, - INODE_PKEY (dir)->k_objectid ); - } - - /* look for place in the tree for new item */ - retval = search_item (sb, &key, path); - if (retval == IO_ERROR) { - reiserfs_warning (sb, "vs-13080: reiserfs_new_directory: " - "i/o failure occurred creating new directory"); - return -EIO; - } - if (retval == ITEM_FOUND) { - pathrelse (path); - reiserfs_warning (sb, "vs-13070: reiserfs_new_directory: " - "object with this key exists (%k)", &(ih->ih_key)); - return -EEXIST; - } - - /* insert item, that is empty directory item */ - return reiserfs_insert_item (th, path, &key, ih, inode, body); -} + struct super_block *sb = th->t_super; + char empty_dir[EMPTY_DIR_SIZE]; + char *body = empty_dir; + struct cpu_key key; + int retval; + + BUG_ON(!th->t_trans_id); + + _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), + le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, + TYPE_DIRENTRY, 3 /*key length */ ); + + /* compose item head for new item. Directories consist of items of + old type (ITEM_VERSION_1). Do not set key (second arg is 0), it + is done by reiserfs_new_inode */ + if (old_format_only(sb)) { + make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, + TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); + + make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, + ih->ih_key.k_objectid, + INODE_PKEY(dir)->k_dir_id, + INODE_PKEY(dir)->k_objectid); + } else { + make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, + TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); + + make_empty_dir_item(body, ih->ih_key.k_dir_id, + ih->ih_key.k_objectid, + INODE_PKEY(dir)->k_dir_id, + INODE_PKEY(dir)->k_objectid); + } + + /* look for place in the tree for new item */ + retval = search_item(sb, &key, path); + if (retval == IO_ERROR) { + reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: " + "i/o failure occurred creating new directory"); + return -EIO; + } + if (retval == ITEM_FOUND) { + pathrelse(path); + reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: " + "object with this key exists (%k)", + &(ih->ih_key)); + return -EEXIST; + } + /* insert item, that is empty directory item */ + return reiserfs_insert_item(th, path, &key, ih, inode, body); +} /* stat data of object has been inserted, this inserts the item containing the body of symlink */ -static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, - struct inode *inode, /* Inode of symlink */ - struct item_head * ih, - struct path * path, const char * symname, int item_len) +static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ + struct item_head *ih, + struct path *path, const char *symname, + int item_len) { - struct super_block * sb = th->t_super; - struct cpu_key key; - int retval; - - BUG_ON (!th->t_trans_id); - - _make_cpu_key (&key, KEY_FORMAT_3_5, - le32_to_cpu (ih->ih_key.k_dir_id), - le32_to_cpu (ih->ih_key.k_objectid), - 1, TYPE_DIRECT, 3/*key length*/); - - make_le_item_head (ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/*free_space*/); - - /* look for place in the tree for new item */ - retval = search_item (sb, &key, path); - if (retval == IO_ERROR) { - reiserfs_warning (sb, "vs-13080: reiserfs_new_symlinik: " - "i/o failure occurred creating new symlink"); - return -EIO; - } - if (retval == ITEM_FOUND) { - pathrelse (path); - reiserfs_warning (sb, "vs-13080: reiserfs_new_symlink: " - "object with this key exists (%k)", &(ih->ih_key)); - return -EEXIST; - } - - /* insert item, that is body of symlink */ - return reiserfs_insert_item (th, path, &key, ih, inode, symname); -} + struct super_block *sb = th->t_super; + struct cpu_key key; + int retval; + + BUG_ON(!th->t_trans_id); + + _make_cpu_key(&key, KEY_FORMAT_3_5, + le32_to_cpu(ih->ih_key.k_dir_id), + le32_to_cpu(ih->ih_key.k_objectid), + 1, TYPE_DIRECT, 3 /*key length */ ); + + make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, + 0 /*free_space */ ); + + /* look for place in the tree for new item */ + retval = search_item(sb, &key, path); + if (retval == IO_ERROR) { + reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: " + "i/o failure occurred creating new symlink"); + return -EIO; + } + if (retval == ITEM_FOUND) { + pathrelse(path); + reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: " + "object with this key exists (%k)", + &(ih->ih_key)); + return -EEXIST; + } + /* insert item, that is body of symlink */ + return reiserfs_insert_item(th, path, &key, ih, inode, symname); +} /* inserts the stat data into the tree, and then calls reiserfs_new_directory (to insert ".", ".." item if new object is @@ -1678,213 +1769,219 @@ static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, non-zero due to an error, we have to drop the quota previously allocated for the fresh inode. This can only be done outside a transaction, so if we return non-zero, we also end the transaction. */ -int reiserfs_new_inode (struct reiserfs_transaction_handle *th, - struct inode * dir, int mode, - const char * symname, - /* 0 for regular, EMTRY_DIR_SIZE for dirs, - strlen (symname) for symlinks)*/ - loff_t i_size, struct dentry *dentry, - struct inode *inode) +int reiserfs_new_inode(struct reiserfs_transaction_handle *th, + struct inode *dir, int mode, const char *symname, + /* 0 for regular, EMTRY_DIR_SIZE for dirs, + strlen (symname) for symlinks) */ + loff_t i_size, struct dentry *dentry, + struct inode *inode) { - struct super_block * sb; - INITIALIZE_PATH (path_to_key); - struct cpu_key key; - struct item_head ih; - struct stat_data sd; - int retval; - int err; - - BUG_ON (!th->t_trans_id); - - if (DQUOT_ALLOC_INODE(inode)) { - err = -EDQUOT; - goto out_end_trans; - } - if (!dir || !dir->i_nlink) { - err = -EPERM; - goto out_bad_inode; - } - - sb = dir->i_sb; - - /* item head of new item */ - ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); - ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th)); - if (!ih.ih_key.k_objectid) { - err = -ENOMEM; - goto out_bad_inode ; - } - if (old_format_only (sb)) - /* not a perfect generation count, as object ids can be reused, but - ** this is as good as reiserfs can do right now. - ** note that the private part of inode isn't filled in yet, we have - ** to use the directory. - */ - inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid); - else + struct super_block *sb; + INITIALIZE_PATH(path_to_key); + struct cpu_key key; + struct item_head ih; + struct stat_data sd; + int retval; + int err; + + BUG_ON(!th->t_trans_id); + + if (DQUOT_ALLOC_INODE(inode)) { + err = -EDQUOT; + goto out_end_trans; + } + if (!dir || !dir->i_nlink) { + err = -EPERM; + goto out_bad_inode; + } + + sb = dir->i_sb; + + /* item head of new item */ + ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); + ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); + if (!ih.ih_key.k_objectid) { + err = -ENOMEM; + goto out_bad_inode; + } + if (old_format_only(sb)) + /* not a perfect generation count, as object ids can be reused, but + ** this is as good as reiserfs can do right now. + ** note that the private part of inode isn't filled in yet, we have + ** to use the directory. + */ + inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); + else #if defined( USE_INODE_GENERATION_COUNTER ) - inode->i_generation = le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); + inode->i_generation = + le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); #else - inode->i_generation = ++event; + inode->i_generation = ++event; #endif - /* fill stat data */ - inode->i_nlink = (S_ISDIR (mode) ? 2 : 1); - - /* uid and gid must already be set by the caller for quota init */ - - /* symlink cannot be immutable or append only, right? */ - if( S_ISLNK( inode -> i_mode ) ) - inode -> i_flags &= ~ ( S_IMMUTABLE | S_APPEND ); - - inode->i_mtime = inode->i_atime = inode->i_ctime = - CURRENT_TIME_SEC; - inode->i_size = i_size; - inode->i_blocks = 0; - inode->i_bytes = 0; - REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : - U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/; - - INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list )); - REISERFS_I(inode)->i_flags = 0; - REISERFS_I(inode)->i_prealloc_block = 0; - REISERFS_I(inode)->i_prealloc_count = 0; - REISERFS_I(inode)->i_trans_id = 0; - REISERFS_I(inode)->i_jl = NULL; - REISERFS_I(inode)->i_attrs = - REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; - sd_attrs_to_i_attrs( REISERFS_I(inode) -> i_attrs, inode ); - REISERFS_I(inode)->i_acl_access = NULL; - REISERFS_I(inode)->i_acl_default = NULL; - init_rwsem (&REISERFS_I(inode)->xattr_sem); - - if (old_format_only (sb)) - make_le_item_head (&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); - else - make_le_item_head (&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); - - /* key to search for correct place for new stat data */ - _make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id), - le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/); - - /* find proper place for inserting of stat data */ - retval = search_item (sb, &key, &path_to_key); - if (retval == IO_ERROR) { - err = -EIO; - goto out_bad_inode; - } - if (retval == ITEM_FOUND) { - pathrelse (&path_to_key); - err = -EEXIST; - goto out_bad_inode; - } - if (old_format_only (sb)) { - if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { - pathrelse (&path_to_key); - /* i_uid or i_gid is too big to be stored in stat data v3.5 */ - err = -EINVAL; - goto out_bad_inode; - } - inode2sd_v1 (&sd, inode, inode->i_size); - } else { - inode2sd (&sd, inode, inode->i_size); - } - // these do not go to on-disk stat data - inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid); - inode->i_blksize = reiserfs_default_io_size; - - // store in in-core inode the key of stat data and version all - // object items will have (directory items will have old offset - // format, other new objects will consist of new items) - memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE); - if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode)) - set_inode_item_key_version (inode, KEY_FORMAT_3_5); - else - set_inode_item_key_version (inode, KEY_FORMAT_3_6); - if (old_format_only (sb)) - set_inode_sd_version (inode, STAT_DATA_V1); - else - set_inode_sd_version (inode, STAT_DATA_V2); - - /* insert the stat data into the tree */ + /* fill stat data */ + inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); + + /* uid and gid must already be set by the caller for quota init */ + + /* symlink cannot be immutable or append only, right? */ + if (S_ISLNK(inode->i_mode)) + inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); + + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_size = i_size; + inode->i_blocks = 0; + inode->i_bytes = 0; + REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : + U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; + + INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); + REISERFS_I(inode)->i_flags = 0; + REISERFS_I(inode)->i_prealloc_block = 0; + REISERFS_I(inode)->i_prealloc_count = 0; + REISERFS_I(inode)->i_trans_id = 0; + REISERFS_I(inode)->i_jl = NULL; + REISERFS_I(inode)->i_attrs = + REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; + sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); + REISERFS_I(inode)->i_acl_access = NULL; + REISERFS_I(inode)->i_acl_default = NULL; + init_rwsem(&REISERFS_I(inode)->xattr_sem); + + if (old_format_only(sb)) + make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, + TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); + else + make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, + TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); + + /* key to search for correct place for new stat data */ + _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), + le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, + TYPE_STAT_DATA, 3 /*key length */ ); + + /* find proper place for inserting of stat data */ + retval = search_item(sb, &key, &path_to_key); + if (retval == IO_ERROR) { + err = -EIO; + goto out_bad_inode; + } + if (retval == ITEM_FOUND) { + pathrelse(&path_to_key); + err = -EEXIST; + goto out_bad_inode; + } + if (old_format_only(sb)) { + if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { + pathrelse(&path_to_key); + /* i_uid or i_gid is too big to be stored in stat data v3.5 */ + err = -EINVAL; + goto out_bad_inode; + } + inode2sd_v1(&sd, inode, inode->i_size); + } else { + inode2sd(&sd, inode, inode->i_size); + } + // these do not go to on-disk stat data + inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); + inode->i_blksize = reiserfs_default_io_size; + + // store in in-core inode the key of stat data and version all + // object items will have (directory items will have old offset + // format, other new objects will consist of new items) + memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); + if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) + set_inode_item_key_version(inode, KEY_FORMAT_3_5); + else + set_inode_item_key_version(inode, KEY_FORMAT_3_6); + if (old_format_only(sb)) + set_inode_sd_version(inode, STAT_DATA_V1); + else + set_inode_sd_version(inode, STAT_DATA_V2); + + /* insert the stat data into the tree */ #ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (REISERFS_I(dir)->new_packing_locality) - th->displace_new_blocks = 1; + if (REISERFS_I(dir)->new_packing_locality) + th->displace_new_blocks = 1; #endif - retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, inode, (char *)(&sd)); - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key) ; - goto out_bad_inode; - } - + retval = + reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, + (char *)(&sd)); + if (retval) { + err = retval; + reiserfs_check_path(&path_to_key); + goto out_bad_inode; + } #ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (!th->displace_new_blocks) - REISERFS_I(dir)->new_packing_locality = 0; + if (!th->displace_new_blocks) + REISERFS_I(dir)->new_packing_locality = 0; #endif - if (S_ISDIR(mode)) { - /* insert item with "." and ".." */ - retval = reiserfs_new_directory (th, inode, &ih, &path_to_key, dir); - } - - if (S_ISLNK(mode)) { - /* insert body of symlink */ - if (!old_format_only (sb)) - i_size = ROUND_UP(i_size); - retval = reiserfs_new_symlink (th, inode, &ih, &path_to_key, symname, i_size); - } - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key) ; - journal_end(th, th->t_super, th->t_blocks_allocated); - goto out_inserted_sd; - } - - /* XXX CHECK THIS */ - if (reiserfs_posixacl (inode->i_sb)) { - retval = reiserfs_inherit_default_acl (dir, dentry, inode); - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key) ; - journal_end(th, th->t_super, th->t_blocks_allocated); - goto out_inserted_sd; - } - } else if (inode->i_sb->s_flags & MS_POSIXACL) { - reiserfs_warning (inode->i_sb, "ACLs aren't enabled in the fs, " - "but vfs thinks they are!"); - } else if (is_reiserfs_priv_object (dir)) { - reiserfs_mark_inode_private (inode); - } - - insert_inode_hash (inode); - reiserfs_update_sd(th, inode); - reiserfs_check_path(&path_to_key) ; - - return 0; + if (S_ISDIR(mode)) { + /* insert item with "." and ".." */ + retval = + reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); + } + + if (S_ISLNK(mode)) { + /* insert body of symlink */ + if (!old_format_only(sb)) + i_size = ROUND_UP(i_size); + retval = + reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, + i_size); + } + if (retval) { + err = retval; + reiserfs_check_path(&path_to_key); + journal_end(th, th->t_super, th->t_blocks_allocated); + goto out_inserted_sd; + } + + /* XXX CHECK THIS */ + if (reiserfs_posixacl(inode->i_sb)) { + retval = reiserfs_inherit_default_acl(dir, dentry, inode); + if (retval) { + err = retval; + reiserfs_check_path(&path_to_key); + journal_end(th, th->t_super, th->t_blocks_allocated); + goto out_inserted_sd; + } + } else if (inode->i_sb->s_flags & MS_POSIXACL) { + reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, " + "but vfs thinks they are!"); + } else if (is_reiserfs_priv_object(dir)) { + reiserfs_mark_inode_private(inode); + } + + insert_inode_hash(inode); + reiserfs_update_sd(th, inode); + reiserfs_check_path(&path_to_key); + + return 0; /* it looks like you can easily compress these two goto targets into * one. Keeping it like this doesn't actually hurt anything, and they * are place holders for what the quota code actually needs. */ -out_bad_inode: - /* Invalidate the object, nothing was inserted yet */ - INODE_PKEY(inode)->k_objectid = 0; - - /* Quota change must be inside a transaction for journaling */ - DQUOT_FREE_INODE(inode); - -out_end_trans: - journal_end(th, th->t_super, th->t_blocks_allocated) ; - /* Drop can be outside and it needs more credits so it's better to have it outside */ - DQUOT_DROP(inode); - inode->i_flags |= S_NOQUOTA; - make_bad_inode(inode); - -out_inserted_sd: - inode->i_nlink = 0; - th->t_trans_id = 0; /* so the caller can't use this handle later */ - iput(inode); - return err; + out_bad_inode: + /* Invalidate the object, nothing was inserted yet */ + INODE_PKEY(inode)->k_objectid = 0; + + /* Quota change must be inside a transaction for journaling */ + DQUOT_FREE_INODE(inode); + + out_end_trans: + journal_end(th, th->t_super, th->t_blocks_allocated); + /* Drop can be outside and it needs more credits so it's better to have it outside */ + DQUOT_DROP(inode); + inode->i_flags |= S_NOQUOTA; + make_bad_inode(inode); + + out_inserted_sd: + inode->i_nlink = 0; + th->t_trans_id = 0; /* so the caller can't use this handle later */ + iput(inode); + return err; } /* @@ -1900,77 +1997,78 @@ out_inserted_sd: ** ** on failure, nonzero is returned, page_result and bh_result are untouched. */ -static int grab_tail_page(struct inode *p_s_inode, - struct page **page_result, - struct buffer_head **bh_result) { - - /* we want the page with the last byte in the file, - ** not the page that will hold the next byte for appending - */ - unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ; - unsigned long pos = 0 ; - unsigned long start = 0 ; - unsigned long blocksize = p_s_inode->i_sb->s_blocksize ; - unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ; - struct buffer_head *bh ; - struct buffer_head *head ; - struct page * page ; - int error ; - - /* we know that we are only called with inode->i_size > 0. - ** we also know that a file tail can never be as big as a block - ** If i_size % blocksize == 0, our file is currently block aligned - ** and it won't need converting or zeroing after a truncate. - */ - if ((offset & (blocksize - 1)) == 0) { - return -ENOENT ; - } - page = grab_cache_page(p_s_inode->i_mapping, index) ; - error = -ENOMEM ; - if (!page) { - goto out ; - } - /* start within the page of the last block in the file */ - start = (offset / blocksize) * blocksize ; - - error = block_prepare_write(page, start, offset, - reiserfs_get_block_create_0) ; - if (error) - goto unlock ; - - head = page_buffers(page) ; - bh = head; - do { - if (pos >= start) { - break ; - } - bh = bh->b_this_page ; - pos += blocksize ; - } while(bh != head) ; - - if (!buffer_uptodate(bh)) { - /* note, this should never happen, prepare_write should - ** be taking care of this for us. If the buffer isn't up to date, - ** I've screwed up the code to find the buffer, or the code to - ** call prepare_write - */ - reiserfs_warning (p_s_inode->i_sb, - "clm-6000: error reading block %lu on dev %s", - bh->b_blocknr, - reiserfs_bdevname (p_s_inode->i_sb)) ; - error = -EIO ; - goto unlock ; - } - *bh_result = bh ; - *page_result = page ; - -out: - return error ; - -unlock: - unlock_page(page) ; - page_cache_release(page) ; - return error ; +static int grab_tail_page(struct inode *p_s_inode, + struct page **page_result, + struct buffer_head **bh_result) +{ + + /* we want the page with the last byte in the file, + ** not the page that will hold the next byte for appending + */ + unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT; + unsigned long pos = 0; + unsigned long start = 0; + unsigned long blocksize = p_s_inode->i_sb->s_blocksize; + unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1); + struct buffer_head *bh; + struct buffer_head *head; + struct page *page; + int error; + + /* we know that we are only called with inode->i_size > 0. + ** we also know that a file tail can never be as big as a block + ** If i_size % blocksize == 0, our file is currently block aligned + ** and it won't need converting or zeroing after a truncate. + */ + if ((offset & (blocksize - 1)) == 0) { + return -ENOENT; + } + page = grab_cache_page(p_s_inode->i_mapping, index); + error = -ENOMEM; + if (!page) { + goto out; + } + /* start within the page of the last block in the file */ + start = (offset / blocksize) * blocksize; + + error = block_prepare_write(page, start, offset, + reiserfs_get_block_create_0); + if (error) + goto unlock; + + head = page_buffers(page); + bh = head; + do { + if (pos >= start) { + break; + } + bh = bh->b_this_page; + pos += blocksize; + } while (bh != head); + + if (!buffer_uptodate(bh)) { + /* note, this should never happen, prepare_write should + ** be taking care of this for us. If the buffer isn't up to date, + ** I've screwed up the code to find the buffer, or the code to + ** call prepare_write + */ + reiserfs_warning(p_s_inode->i_sb, + "clm-6000: error reading block %lu on dev %s", + bh->b_blocknr, + reiserfs_bdevname(p_s_inode->i_sb)); + error = -EIO; + goto unlock; + } + *bh_result = bh; + *page_result = page; + + out: + return error; + + unlock: + unlock_page(page); + page_cache_release(page); + return error; } /* @@ -1979,235 +2077,247 @@ unlock: ** ** some code taken from block_truncate_page */ -int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) { - struct reiserfs_transaction_handle th ; - /* we want the offset for the first byte after the end of the file */ - unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ; - unsigned blocksize = p_s_inode->i_sb->s_blocksize ; - unsigned length ; - struct page *page = NULL ; - int error ; - struct buffer_head *bh = NULL ; - - reiserfs_write_lock(p_s_inode->i_sb); - - if (p_s_inode->i_size > 0) { - if ((error = grab_tail_page(p_s_inode, &page, &bh))) { - // -ENOENT means we truncated past the end of the file, - // and get_block_create_0 could not find a block to read in, - // which is ok. - if (error != -ENOENT) - reiserfs_warning (p_s_inode->i_sb, - "clm-6001: grab_tail_page failed %d", - error); - page = NULL ; - bh = NULL ; - } - } - - /* so, if page != NULL, we have a buffer head for the offset at - ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, - ** then we have an unformatted node. Otherwise, we have a direct item, - ** and no zeroing is required on disk. We zero after the truncate, - ** because the truncate might pack the item anyway - ** (it will unmap bh if it packs). - */ - /* it is enough to reserve space in transaction for 2 balancings: - one for "save" link adding and another for the first - cut_from_item. 1 is for update_sd */ - error = journal_begin (&th, p_s_inode->i_sb, - JOURNAL_PER_BALANCE_CNT * 2 + 1); - if (error) - goto out; - reiserfs_update_inode_transaction(p_s_inode) ; - if (update_timestamps) - /* we are doing real truncate: if the system crashes before the last - transaction of truncating gets committed - on reboot the file - either appears truncated properly or not truncated at all */ - add_save_link (&th, p_s_inode, 1); - error = reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ; - if (error) - goto out; - error = journal_end (&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); - if (error) - goto out; - - if (update_timestamps) { - error = remove_save_link (p_s_inode, 1/* truncate */); - if (error) - goto out; - } - - if (page) { - length = offset & (blocksize - 1) ; - /* if we are not on a block boundary */ - if (length) { - char *kaddr; - - length = blocksize - length ; - kaddr = kmap_atomic(page, KM_USER0) ; - memset(kaddr + offset, 0, length) ; - flush_dcache_page(page) ; - kunmap_atomic(kaddr, KM_USER0) ; - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - mark_buffer_dirty(bh) ; - } - } - unlock_page(page) ; - page_cache_release(page) ; - } - - reiserfs_write_unlock(p_s_inode->i_sb); - return 0; -out: - if (page) { - unlock_page (page); - page_cache_release (page); - } - reiserfs_write_unlock(p_s_inode->i_sb); - return error; -} +int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) +{ + struct reiserfs_transaction_handle th; + /* we want the offset for the first byte after the end of the file */ + unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1); + unsigned blocksize = p_s_inode->i_sb->s_blocksize; + unsigned length; + struct page *page = NULL; + int error; + struct buffer_head *bh = NULL; + + reiserfs_write_lock(p_s_inode->i_sb); + + if (p_s_inode->i_size > 0) { + if ((error = grab_tail_page(p_s_inode, &page, &bh))) { + // -ENOENT means we truncated past the end of the file, + // and get_block_create_0 could not find a block to read in, + // which is ok. + if (error != -ENOENT) + reiserfs_warning(p_s_inode->i_sb, + "clm-6001: grab_tail_page failed %d", + error); + page = NULL; + bh = NULL; + } + } -static int map_block_for_writepage(struct inode *inode, - struct buffer_head *bh_result, - unsigned long block) { - struct reiserfs_transaction_handle th ; - int fs_gen ; - struct item_head tmp_ih ; - struct item_head *ih ; - struct buffer_head *bh ; - __le32 *item ; - struct cpu_key key ; - INITIALIZE_PATH(path) ; - int pos_in_item ; - int jbegin_count = JOURNAL_PER_BALANCE_CNT ; - loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ; - int retval ; - int use_get_block = 0 ; - int bytes_copied = 0 ; - int copy_size ; - int trans_running = 0; - - /* catch places below that try to log something without starting a trans */ - th.t_trans_id = 0; - - if (!buffer_uptodate(bh_result)) { - return -EIO; - } - - kmap(bh_result->b_page) ; -start_over: - reiserfs_write_lock(inode->i_sb); - make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ; - -research: - retval = search_for_position_by_key(inode->i_sb, &key, &path) ; - if (retval != POSITION_FOUND) { - use_get_block = 1; - goto out ; - } - - bh = get_last_bh(&path) ; - ih = get_ih(&path) ; - item = get_item(&path) ; - pos_in_item = path.pos_in_item ; - - /* we've found an unformatted node */ - if (indirect_item_found(retval, ih)) { - if (bytes_copied > 0) { - reiserfs_warning (inode->i_sb, "clm-6002: bytes_copied %d", - bytes_copied) ; - } - if (!get_block_num(item, pos_in_item)) { - /* crap, we are writing to a hole */ - use_get_block = 1; - goto out ; - } - set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode); - } else if (is_direct_le_ih(ih)) { - char *p ; - p = page_address(bh_result->b_page) ; - p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ; - copy_size = ih_item_len(ih) - pos_in_item; - - fs_gen = get_generation(inode->i_sb) ; - copy_item_head(&tmp_ih, ih) ; - - if (!trans_running) { - /* vs-3050 is gone, no need to drop the path */ - retval = journal_begin(&th, inode->i_sb, jbegin_count) ; - if (retval) - goto out; - reiserfs_update_inode_transaction(inode) ; - trans_running = 1; - if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; - goto research; - } - } - - reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; - - if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; - goto research; - } - - memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ; - - journal_mark_dirty(&th, inode->i_sb, bh) ; - bytes_copied += copy_size ; - set_block_dev_mapped(bh_result, 0, inode); - - /* are there still bytes left? */ - if (bytes_copied < bh_result->b_size && - (byte_offset + bytes_copied) < inode->i_size) { - set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ; - goto research ; - } - } else { - reiserfs_warning (inode->i_sb, - "clm-6003: bad item inode %lu, device %s", - inode->i_ino, reiserfs_bdevname (inode->i_sb)) ; - retval = -EIO ; - goto out ; - } - retval = 0 ; - -out: - pathrelse(&path) ; - if (trans_running) { - int err = journal_end(&th, inode->i_sb, jbegin_count) ; - if (err) - retval = err; - trans_running = 0; - } - reiserfs_write_unlock(inode->i_sb); - - /* this is where we fill in holes in the file. */ - if (use_get_block) { - retval = reiserfs_get_block(inode, block, bh_result, - GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM | - GET_BLOCK_NO_DANGLE); - if (!retval) { - if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) { - /* get_block failed to find a mapped unformatted node. */ - use_get_block = 0 ; - goto start_over ; - } - } - } - kunmap(bh_result->b_page) ; - - if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { - /* we've copied data from the page into the direct item, so the - * buffer in the page is now clean, mark it to reflect that. + /* so, if page != NULL, we have a buffer head for the offset at + ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, + ** then we have an unformatted node. Otherwise, we have a direct item, + ** and no zeroing is required on disk. We zero after the truncate, + ** because the truncate might pack the item anyway + ** (it will unmap bh if it packs). */ - lock_buffer(bh_result); - clear_buffer_dirty(bh_result); - unlock_buffer(bh_result); - } - return retval ; + /* it is enough to reserve space in transaction for 2 balancings: + one for "save" link adding and another for the first + cut_from_item. 1 is for update_sd */ + error = journal_begin(&th, p_s_inode->i_sb, + JOURNAL_PER_BALANCE_CNT * 2 + 1); + if (error) + goto out; + reiserfs_update_inode_transaction(p_s_inode); + if (update_timestamps) + /* we are doing real truncate: if the system crashes before the last + transaction of truncating gets committed - on reboot the file + either appears truncated properly or not truncated at all */ + add_save_link(&th, p_s_inode, 1); + error = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps); + if (error) + goto out; + error = + journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); + if (error) + goto out; + + if (update_timestamps) { + error = remove_save_link(p_s_inode, 1 /* truncate */ ); + if (error) + goto out; + } + + if (page) { + length = offset & (blocksize - 1); + /* if we are not on a block boundary */ + if (length) { + char *kaddr; + + length = blocksize - length; + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, length); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + mark_buffer_dirty(bh); + } + } + unlock_page(page); + page_cache_release(page); + } + + reiserfs_write_unlock(p_s_inode->i_sb); + return 0; + out: + if (page) { + unlock_page(page); + page_cache_release(page); + } + reiserfs_write_unlock(p_s_inode->i_sb); + return error; +} + +static int map_block_for_writepage(struct inode *inode, + struct buffer_head *bh_result, + unsigned long block) +{ + struct reiserfs_transaction_handle th; + int fs_gen; + struct item_head tmp_ih; + struct item_head *ih; + struct buffer_head *bh; + __le32 *item; + struct cpu_key key; + INITIALIZE_PATH(path); + int pos_in_item; + int jbegin_count = JOURNAL_PER_BALANCE_CNT; + loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1; + int retval; + int use_get_block = 0; + int bytes_copied = 0; + int copy_size; + int trans_running = 0; + + /* catch places below that try to log something without starting a trans */ + th.t_trans_id = 0; + + if (!buffer_uptodate(bh_result)) { + return -EIO; + } + + kmap(bh_result->b_page); + start_over: + reiserfs_write_lock(inode->i_sb); + make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); + + research: + retval = search_for_position_by_key(inode->i_sb, &key, &path); + if (retval != POSITION_FOUND) { + use_get_block = 1; + goto out; + } + + bh = get_last_bh(&path); + ih = get_ih(&path); + item = get_item(&path); + pos_in_item = path.pos_in_item; + + /* we've found an unformatted node */ + if (indirect_item_found(retval, ih)) { + if (bytes_copied > 0) { + reiserfs_warning(inode->i_sb, + "clm-6002: bytes_copied %d", + bytes_copied); + } + if (!get_block_num(item, pos_in_item)) { + /* crap, we are writing to a hole */ + use_get_block = 1; + goto out; + } + set_block_dev_mapped(bh_result, + get_block_num(item, pos_in_item), inode); + } else if (is_direct_le_ih(ih)) { + char *p; + p = page_address(bh_result->b_page); + p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1); + copy_size = ih_item_len(ih) - pos_in_item; + + fs_gen = get_generation(inode->i_sb); + copy_item_head(&tmp_ih, ih); + + if (!trans_running) { + /* vs-3050 is gone, no need to drop the path */ + retval = journal_begin(&th, inode->i_sb, jbegin_count); + if (retval) + goto out; + reiserfs_update_inode_transaction(inode); + trans_running = 1; + if (fs_changed(fs_gen, inode->i_sb) + && item_moved(&tmp_ih, &path)) { + reiserfs_restore_prepared_buffer(inode->i_sb, + bh); + goto research; + } + } + + reiserfs_prepare_for_journal(inode->i_sb, bh, 1); + + if (fs_changed(fs_gen, inode->i_sb) + && item_moved(&tmp_ih, &path)) { + reiserfs_restore_prepared_buffer(inode->i_sb, bh); + goto research; + } + + memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, + copy_size); + + journal_mark_dirty(&th, inode->i_sb, bh); + bytes_copied += copy_size; + set_block_dev_mapped(bh_result, 0, inode); + + /* are there still bytes left? */ + if (bytes_copied < bh_result->b_size && + (byte_offset + bytes_copied) < inode->i_size) { + set_cpu_key_k_offset(&key, + cpu_key_k_offset(&key) + + copy_size); + goto research; + } + } else { + reiserfs_warning(inode->i_sb, + "clm-6003: bad item inode %lu, device %s", + inode->i_ino, reiserfs_bdevname(inode->i_sb)); + retval = -EIO; + goto out; + } + retval = 0; + + out: + pathrelse(&path); + if (trans_running) { + int err = journal_end(&th, inode->i_sb, jbegin_count); + if (err) + retval = err; + trans_running = 0; + } + reiserfs_write_unlock(inode->i_sb); + + /* this is where we fill in holes in the file. */ + if (use_get_block) { + retval = reiserfs_get_block(inode, block, bh_result, + GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM + | GET_BLOCK_NO_DANGLE); + if (!retval) { + if (!buffer_mapped(bh_result) + || bh_result->b_blocknr == 0) { + /* get_block failed to find a mapped unformatted node. */ + use_get_block = 0; + goto start_over; + } + } + } + kunmap(bh_result->b_page); + + if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { + /* we've copied data from the page into the direct item, so the + * buffer in the page is now clean, mark it to reflect that. + */ + lock_buffer(bh_result); + clear_buffer_dirty(bh_result); + unlock_buffer(bh_result); + } + return retval; } /* @@ -2215,383 +2325,390 @@ out: * start/recovery path as __block_write_full_page, along with special * code to handle reiserfs tails. */ -static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host ; - unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ; - int error = 0; - unsigned long block ; - struct buffer_head *head, *bh; - int partial = 0 ; - int nr = 0; - int checked = PageChecked(page); - struct reiserfs_transaction_handle th; - struct super_block *s = inode->i_sb; - int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; - th.t_trans_id = 0; - - /* The page dirty bit is cleared before writepage is called, which - * means we have to tell create_empty_buffers to make dirty buffers - * The page really should be up to date at this point, so tossing - * in the BH_Uptodate is just a sanity check. - */ - if (!page_has_buffers(page)) { - create_empty_buffers(page, s->s_blocksize, - (1 << BH_Dirty) | (1 << BH_Uptodate)); - } - head = page_buffers(page) ; - - /* last page in the file, zero out any contents past the - ** last byte in the file - */ - if (page->index >= end_index) { - char *kaddr; - unsigned last_offset; - - last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ; - /* no file contents in this page */ - if (page->index >= end_index + 1 || !last_offset) { - unlock_page(page); - return 0; - } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE-last_offset) ; - flush_dcache_page(page) ; - kunmap_atomic(kaddr, KM_USER0) ; - } - bh = head ; - block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits) ; - /* first map all the buffers, logging any direct items we find */ - do { - if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) || - (buffer_mapped(bh) && bh->b_blocknr == 0))) { - /* not mapped yet, or it points to a direct item, search - * the btree for the mapping info, and log any direct - * items found - */ - if ((error = map_block_for_writepage(inode, bh, block))) { - goto fail ; - } - } - bh = bh->b_this_page; - block++; - } while(bh != head) ; - - /* - * we start the transaction after map_block_for_writepage, - * because it can create holes in the file (an unbounded operation). - * starting it here, we can make a reliable estimate for how many - * blocks we're going to log - */ - if (checked) { - ClearPageChecked(page); - reiserfs_write_lock(s); - error = journal_begin(&th, s, bh_per_page + 1); - if (error) { - reiserfs_write_unlock(s); - goto fail; - } - reiserfs_update_inode_transaction(inode); - } - /* now go through and lock any dirty buffers on the page */ - do { - get_bh(bh); - if (!buffer_mapped(bh)) - continue; - if (buffer_mapped(bh) && bh->b_blocknr == 0) - continue; +static int reiserfs_write_full_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; + int error = 0; + unsigned long block; + struct buffer_head *head, *bh; + int partial = 0; + int nr = 0; + int checked = PageChecked(page); + struct reiserfs_transaction_handle th; + struct super_block *s = inode->i_sb; + int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; + th.t_trans_id = 0; + + /* The page dirty bit is cleared before writepage is called, which + * means we have to tell create_empty_buffers to make dirty buffers + * The page really should be up to date at this point, so tossing + * in the BH_Uptodate is just a sanity check. + */ + if (!page_has_buffers(page)) { + create_empty_buffers(page, s->s_blocksize, + (1 << BH_Dirty) | (1 << BH_Uptodate)); + } + head = page_buffers(page); - if (checked) { - reiserfs_prepare_for_journal(s, bh, 1); - journal_mark_dirty(&th, s, bh); - continue; + /* last page in the file, zero out any contents past the + ** last byte in the file + */ + if (page->index >= end_index) { + char *kaddr; + unsigned last_offset; + + last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); + /* no file contents in this page */ + if (page->index >= end_index + 1 || !last_offset) { + unlock_page(page); + return 0; + } + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); } - /* from this point on, we know the buffer is mapped to a - * real block and not a direct item + bh = head; + block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); + /* first map all the buffers, logging any direct items we find */ + do { + if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) || + (buffer_mapped(bh) + && bh->b_blocknr == + 0))) { + /* not mapped yet, or it points to a direct item, search + * the btree for the mapping info, and log any direct + * items found + */ + if ((error = map_block_for_writepage(inode, bh, block))) { + goto fail; + } + } + bh = bh->b_this_page; + block++; + } while (bh != head); + + /* + * we start the transaction after map_block_for_writepage, + * because it can create holes in the file (an unbounded operation). + * starting it here, we can make a reliable estimate for how many + * blocks we're going to log */ - if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { - lock_buffer(bh); - } else { - if (test_set_buffer_locked(bh)) { - redirty_page_for_writepage(wbc, page); - continue; - } + if (checked) { + ClearPageChecked(page); + reiserfs_write_lock(s); + error = journal_begin(&th, s, bh_per_page + 1); + if (error) { + reiserfs_write_unlock(s); + goto fail; + } + reiserfs_update_inode_transaction(inode); } - if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write(bh); - } else { - unlock_buffer(bh); + /* now go through and lock any dirty buffers on the page */ + do { + get_bh(bh); + if (!buffer_mapped(bh)) + continue; + if (buffer_mapped(bh) && bh->b_blocknr == 0) + continue; + + if (checked) { + reiserfs_prepare_for_journal(s, bh, 1); + journal_mark_dirty(&th, s, bh); + continue; + } + /* from this point on, we know the buffer is mapped to a + * real block and not a direct item + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else { + if (test_set_buffer_locked(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + } + if (test_clear_buffer_dirty(bh)) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + if (checked) { + error = journal_end(&th, s, bh_per_page + 1); + reiserfs_write_unlock(s); + if (error) + goto fail; } - } while((bh = bh->b_this_page) != head); + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + unlock_page(page); - if (checked) { - error = journal_end(&th, s, bh_per_page + 1); - reiserfs_write_unlock(s); - if (error) - goto fail; - } - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - unlock_page(page); - - /* - * since any buffer might be the only dirty buffer on the page, - * the first submit_bh can bring the page out of writeback. - * be careful with the buffers. - */ - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); - nr++; - } - put_bh(bh); - bh = next; - } while(bh != head); - - error = 0; -done: - if (nr == 0) { - /* - * if this page only had a direct item, it is very possible for - * no io to be required without there being an error. Or, - * someone else could have locked them and sent them down the - * pipe without locking the page + /* + * since any buffer might be the only dirty buffer on the page, + * the first submit_bh can bring the page out of writeback. + * be careful with the buffers. */ - bh = head ; do { - if (!buffer_uptodate(bh)) { - partial = 1; - break; - } - bh = bh->b_this_page; - } while(bh != head); - if (!partial) - SetPageUptodate(page); - end_page_writeback(page); - } - return error; - -fail: - /* catches various errors, we need to make sure any valid dirty blocks - * get to the media. The page is currently locked and not marked for - * writeback - */ - ClearPageUptodate(page); - bh = head; - do { - get_bh(bh); - if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { - lock_buffer(bh); - mark_buffer_async_write(bh); - } else { - /* - * clear any dirty bits that might have come from getting - * attached to a dirty page - */ - clear_buffer_dirty(bh); - } - bh = bh->b_this_page; - } while(bh != head); - SetPageError(page); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - unlock_page(page); - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - clear_buffer_dirty(bh); - submit_bh(WRITE, bh); - nr++; - } - put_bh(bh); - bh = next; - } while(bh != head); - goto done; -} + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(WRITE, bh); + nr++; + } + put_bh(bh); + bh = next; + } while (bh != head); + error = 0; + done: + if (nr == 0) { + /* + * if this page only had a direct item, it is very possible for + * no io to be required without there being an error. Or, + * someone else could have locked them and sent them down the + * pipe without locking the page + */ + bh = head; + do { + if (!buffer_uptodate(bh)) { + partial = 1; + break; + } + bh = bh->b_this_page; + } while (bh != head); + if (!partial) + SetPageUptodate(page); + end_page_writeback(page); + } + return error; -static int reiserfs_readpage (struct file *f, struct page * page) -{ - return block_read_full_page (page, reiserfs_get_block); + fail: + /* catches various errors, we need to make sure any valid dirty blocks + * get to the media. The page is currently locked and not marked for + * writeback + */ + ClearPageUptodate(page); + bh = head; + do { + get_bh(bh); + if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { + lock_buffer(bh); + mark_buffer_async_write(bh); + } else { + /* + * clear any dirty bits that might have come from getting + * attached to a dirty page + */ + clear_buffer_dirty(bh); + } + bh = bh->b_this_page; + } while (bh != head); + SetPageError(page); + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + unlock_page(page); + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + clear_buffer_dirty(bh); + submit_bh(WRITE, bh); + nr++; + } + put_bh(bh); + bh = next; + } while (bh != head); + goto done; } +static int reiserfs_readpage(struct file *f, struct page *page) +{ + return block_read_full_page(page, reiserfs_get_block); +} -static int reiserfs_writepage (struct page * page, struct writeback_control *wbc) +static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host ; - reiserfs_wait_on_write_block(inode->i_sb) ; - return reiserfs_write_full_page(page, wbc) ; + struct inode *inode = page->mapping->host; + reiserfs_wait_on_write_block(inode->i_sb); + return reiserfs_write_full_page(page, wbc); } static int reiserfs_prepare_write(struct file *f, struct page *page, - unsigned from, unsigned to) { - struct inode *inode = page->mapping->host ; - int ret; - int old_ref = 0; - - reiserfs_wait_on_write_block(inode->i_sb) ; - fix_tail_page_for_writing(page) ; - if (reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th; - th = (struct reiserfs_transaction_handle *)current->journal_info; - BUG_ON (!th->t_refcount); - BUG_ON (!th->t_trans_id); - old_ref = th->t_refcount; - th->t_refcount++; - } - - ret = block_prepare_write(page, from, to, reiserfs_get_block) ; - if (ret && reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th = current->journal_info; - /* this gets a little ugly. If reiserfs_get_block returned an - * error and left a transacstion running, we've got to close it, - * and we've got to free handle if it was a persistent transaction. - * - * But, if we had nested into an existing transaction, we need - * to just drop the ref count on the handle. - * - * If old_ref == 0, the transaction is from reiserfs_get_block, - * and it was a persistent trans. Otherwise, it was nested above. - */ - if (th->t_refcount > old_ref) { - if (old_ref) - th->t_refcount--; - else { - int err; - reiserfs_write_lock(inode->i_sb); - err = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock(inode->i_sb); - if (err) - ret = err; - } + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + int ret; + int old_ref = 0; + + reiserfs_wait_on_write_block(inode->i_sb); + fix_tail_page_for_writing(page); + if (reiserfs_transaction_running(inode->i_sb)) { + struct reiserfs_transaction_handle *th; + th = (struct reiserfs_transaction_handle *)current-> + journal_info; + BUG_ON(!th->t_refcount); + BUG_ON(!th->t_trans_id); + old_ref = th->t_refcount; + th->t_refcount++; } - } - return ret; -} + ret = block_prepare_write(page, from, to, reiserfs_get_block); + if (ret && reiserfs_transaction_running(inode->i_sb)) { + struct reiserfs_transaction_handle *th = current->journal_info; + /* this gets a little ugly. If reiserfs_get_block returned an + * error and left a transacstion running, we've got to close it, + * and we've got to free handle if it was a persistent transaction. + * + * But, if we had nested into an existing transaction, we need + * to just drop the ref count on the handle. + * + * If old_ref == 0, the transaction is from reiserfs_get_block, + * and it was a persistent trans. Otherwise, it was nested above. + */ + if (th->t_refcount > old_ref) { + if (old_ref) + th->t_refcount--; + else { + int err; + reiserfs_write_lock(inode->i_sb); + err = reiserfs_end_persistent_transaction(th); + reiserfs_write_unlock(inode->i_sb); + if (err) + ret = err; + } + } + } + return ret; +} -static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) { - return generic_block_bmap(as, block, reiserfs_bmap) ; +static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) +{ + return generic_block_bmap(as, block, reiserfs_bmap); } -static int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to) { - struct inode *inode = page->mapping->host ; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - int ret = 0; - int update_sd = 0; - struct reiserfs_transaction_handle *th = NULL; - - reiserfs_wait_on_write_block(inode->i_sb) ; - if (reiserfs_transaction_running(inode->i_sb)) { - th = current->journal_info; - } - reiserfs_commit_page(inode, page, from, to); - - /* generic_commit_write does this for us, but does not update the - ** transaction tracking stuff when the size changes. So, we have - ** to do the i_size updates here. - */ - if (pos > inode->i_size) { - struct reiserfs_transaction_handle myth ; - reiserfs_write_lock(inode->i_sb); - /* If the file have grown beyond the border where it - can have a tail, unmark it as needing a tail - packing */ - if ( (have_large_tails (inode->i_sb) && inode->i_size > i_block_size (inode)*4) || - (have_small_tails (inode->i_sb) && inode->i_size > i_block_size(inode)) ) - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ; - - ret = journal_begin(&myth, inode->i_sb, 1) ; - if (ret) { - reiserfs_write_unlock(inode->i_sb); - goto journal_error; - } - reiserfs_update_inode_transaction(inode) ; - inode->i_size = pos ; - reiserfs_update_sd(&myth, inode) ; - update_sd = 1; - ret = journal_end(&myth, inode->i_sb, 1) ; - reiserfs_write_unlock(inode->i_sb); - if (ret) - goto journal_error; - } - if (th) { - reiserfs_write_lock(inode->i_sb); - if (!update_sd) - reiserfs_update_sd(th, inode) ; - ret = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock(inode->i_sb); - if (ret) - goto out; - } - - /* we test for O_SYNC here so we can commit the transaction - ** for any packed tails the file might have had - */ - if (f && (f->f_flags & O_SYNC)) { - reiserfs_write_lock(inode->i_sb); - ret = reiserfs_commit_for_inode(inode) ; - reiserfs_write_unlock(inode->i_sb); - } -out: - return ret ; +static int reiserfs_commit_write(struct file *f, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; + int ret = 0; + int update_sd = 0; + struct reiserfs_transaction_handle *th = NULL; + + reiserfs_wait_on_write_block(inode->i_sb); + if (reiserfs_transaction_running(inode->i_sb)) { + th = current->journal_info; + } + reiserfs_commit_page(inode, page, from, to); -journal_error: - if (th) { - reiserfs_write_lock(inode->i_sb); - if (!update_sd) - reiserfs_update_sd(th, inode) ; - ret = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock(inode->i_sb); - } + /* generic_commit_write does this for us, but does not update the + ** transaction tracking stuff when the size changes. So, we have + ** to do the i_size updates here. + */ + if (pos > inode->i_size) { + struct reiserfs_transaction_handle myth; + reiserfs_write_lock(inode->i_sb); + /* If the file have grown beyond the border where it + can have a tail, unmark it as needing a tail + packing */ + if ((have_large_tails(inode->i_sb) + && inode->i_size > i_block_size(inode) * 4) + || (have_small_tails(inode->i_sb) + && inode->i_size > i_block_size(inode))) + REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; + + ret = journal_begin(&myth, inode->i_sb, 1); + if (ret) { + reiserfs_write_unlock(inode->i_sb); + goto journal_error; + } + reiserfs_update_inode_transaction(inode); + inode->i_size = pos; + reiserfs_update_sd(&myth, inode); + update_sd = 1; + ret = journal_end(&myth, inode->i_sb, 1); + reiserfs_write_unlock(inode->i_sb); + if (ret) + goto journal_error; + } + if (th) { + reiserfs_write_lock(inode->i_sb); + if (!update_sd) + reiserfs_update_sd(th, inode); + ret = reiserfs_end_persistent_transaction(th); + reiserfs_write_unlock(inode->i_sb); + if (ret) + goto out; + } + + /* we test for O_SYNC here so we can commit the transaction + ** for any packed tails the file might have had + */ + if (f && (f->f_flags & O_SYNC)) { + reiserfs_write_lock(inode->i_sb); + ret = reiserfs_commit_for_inode(inode); + reiserfs_write_unlock(inode->i_sb); + } + out: + return ret; - return ret; + journal_error: + if (th) { + reiserfs_write_lock(inode->i_sb); + if (!update_sd) + reiserfs_update_sd(th, inode); + ret = reiserfs_end_persistent_transaction(th); + reiserfs_write_unlock(inode->i_sb); + } + + return ret; } -void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode ) +void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) { - if( reiserfs_attrs( inode -> i_sb ) ) { - if( sd_attrs & REISERFS_SYNC_FL ) - inode -> i_flags |= S_SYNC; + if (reiserfs_attrs(inode->i_sb)) { + if (sd_attrs & REISERFS_SYNC_FL) + inode->i_flags |= S_SYNC; else - inode -> i_flags &= ~S_SYNC; - if( sd_attrs & REISERFS_IMMUTABLE_FL ) - inode -> i_flags |= S_IMMUTABLE; + inode->i_flags &= ~S_SYNC; + if (sd_attrs & REISERFS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; else - inode -> i_flags &= ~S_IMMUTABLE; - if( sd_attrs & REISERFS_APPEND_FL ) - inode -> i_flags |= S_APPEND; + inode->i_flags &= ~S_IMMUTABLE; + if (sd_attrs & REISERFS_APPEND_FL) + inode->i_flags |= S_APPEND; else - inode -> i_flags &= ~S_APPEND; - if( sd_attrs & REISERFS_NOATIME_FL ) - inode -> i_flags |= S_NOATIME; + inode->i_flags &= ~S_APPEND; + if (sd_attrs & REISERFS_NOATIME_FL) + inode->i_flags |= S_NOATIME; else - inode -> i_flags &= ~S_NOATIME; - if( sd_attrs & REISERFS_NOTAIL_FL ) + inode->i_flags &= ~S_NOATIME; + if (sd_attrs & REISERFS_NOTAIL_FL) REISERFS_I(inode)->i_flags |= i_nopack_mask; else REISERFS_I(inode)->i_flags &= ~i_nopack_mask; } } -void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ) +void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) { - if( reiserfs_attrs( inode -> i_sb ) ) { - if( inode -> i_flags & S_IMMUTABLE ) + if (reiserfs_attrs(inode->i_sb)) { + if (inode->i_flags & S_IMMUTABLE) *sd_attrs |= REISERFS_IMMUTABLE_FL; else *sd_attrs &= ~REISERFS_IMMUTABLE_FL; - if( inode -> i_flags & S_SYNC ) + if (inode->i_flags & S_SYNC) *sd_attrs |= REISERFS_SYNC_FL; else *sd_attrs &= ~REISERFS_SYNC_FL; - if( inode -> i_flags & S_NOATIME ) + if (inode->i_flags & S_NOATIME) *sd_attrs |= REISERFS_NOATIME_FL; else *sd_attrs &= ~REISERFS_NOATIME_FL; - if( REISERFS_I(inode)->i_flags & i_nopack_mask ) + if (REISERFS_I(inode)->i_flags & i_nopack_mask) *sd_attrs |= REISERFS_NOTAIL_FL; else *sd_attrs &= ~REISERFS_NOTAIL_FL; @@ -2603,106 +2720,107 @@ void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ) */ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) { - int ret = 1 ; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; - - spin_lock(&j->j_dirty_buffers_lock) ; - if (!buffer_mapped(bh)) { - goto free_jh; - } - /* the page is locked, and the only places that log a data buffer - * also lock the page. - */ - if (reiserfs_file_data_log(inode)) { - /* - * very conservative, leave the buffer pinned if - * anyone might need it. - */ - if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { - ret = 0 ; - } - } else - if (buffer_dirty(bh) || buffer_locked(bh)) { - struct reiserfs_journal_list *jl; - struct reiserfs_jh *jh = bh->b_private; - - /* why is this safe? - * reiserfs_setattr updates i_size in the on disk - * stat data before allowing vmtruncate to be called. - * - * If buffer was put onto the ordered list for this - * transaction, we know for sure either this transaction - * or an older one already has updated i_size on disk, - * and this ordered data won't be referenced in the file - * if we crash. - * - * if the buffer was put onto the ordered list for an older - * transaction, we need to leave it around + int ret = 1; + struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); + + spin_lock(&j->j_dirty_buffers_lock); + if (!buffer_mapped(bh)) { + goto free_jh; + } + /* the page is locked, and the only places that log a data buffer + * also lock the page. */ - if (jh && (jl = jh->jl) && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) - ret = 0; - } -free_jh: - if (ret && bh->b_private) { - reiserfs_free_jh(bh); - } - spin_unlock(&j->j_dirty_buffers_lock) ; - return ret ; + if (reiserfs_file_data_log(inode)) { + /* + * very conservative, leave the buffer pinned if + * anyone might need it. + */ + if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { + ret = 0; + } + } else if (buffer_dirty(bh) || buffer_locked(bh)) { + struct reiserfs_journal_list *jl; + struct reiserfs_jh *jh = bh->b_private; + + /* why is this safe? + * reiserfs_setattr updates i_size in the on disk + * stat data before allowing vmtruncate to be called. + * + * If buffer was put onto the ordered list for this + * transaction, we know for sure either this transaction + * or an older one already has updated i_size on disk, + * and this ordered data won't be referenced in the file + * if we crash. + * + * if the buffer was put onto the ordered list for an older + * transaction, we need to leave it around + */ + if (jh && (jl = jh->jl) + && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) + ret = 0; + } + free_jh: + if (ret && bh->b_private) { + reiserfs_free_jh(bh); + } + spin_unlock(&j->j_dirty_buffers_lock); + return ret; } /* clm -- taken from fs/buffer.c:block_invalidate_page */ static int reiserfs_invalidatepage(struct page *page, unsigned long offset) { - struct buffer_head *head, *bh, *next; - struct inode *inode = page->mapping->host; - unsigned int curr_off = 0; - int ret = 1; + struct buffer_head *head, *bh, *next; + struct inode *inode = page->mapping->host; + unsigned int curr_off = 0; + int ret = 1; - BUG_ON(!PageLocked(page)); + BUG_ON(!PageLocked(page)); - if (offset == 0) - ClearPageChecked(page); + if (offset == 0) + ClearPageChecked(page); - if (!page_has_buffers(page)) - goto out; + if (!page_has_buffers(page)) + goto out; + + head = page_buffers(page); + bh = head; + do { + unsigned int next_off = curr_off + bh->b_size; + next = bh->b_this_page; - head = page_buffers(page); - bh = head; - do { - unsigned int next_off = curr_off + bh->b_size; - next = bh->b_this_page; + /* + * is this block fully invalidated? + */ + if (offset <= curr_off) { + if (invalidatepage_can_drop(inode, bh)) + reiserfs_unmap_buffer(bh); + else + ret = 0; + } + curr_off = next_off; + bh = next; + } while (bh != head); /* - * is this block fully invalidated? + * We release buffers only if the entire page is being invalidated. + * The get_block cached value has been unconditionally invalidated, + * so real IO is not possible anymore. */ - if (offset <= curr_off) { - if (invalidatepage_can_drop(inode, bh)) - reiserfs_unmap_buffer(bh); - else - ret = 0; - } - curr_off = next_off; - bh = next; - } while (bh != head); - - /* - * We release buffers only if the entire page is being invalidated. - * The get_block cached value has been unconditionally invalidated, - * so real IO is not possible anymore. - */ - if (!offset && ret) - ret = try_to_release_page(page, 0); -out: - return ret; + if (!offset && ret) + ret = try_to_release_page(page, 0); + out: + return ret; } -static int reiserfs_set_page_dirty(struct page *page) { - struct inode *inode = page->mapping->host; - if (reiserfs_file_data_log(inode)) { - SetPageChecked(page); - return __set_page_dirty_nobuffers(page); - } - return __set_page_dirty_buffers(page); +static int reiserfs_set_page_dirty(struct page *page) +{ + struct inode *inode = page->mapping->host; + if (reiserfs_file_data_log(inode)) { + SetPageChecked(page); + return __set_page_dirty_nobuffers(page); + } + return __set_page_dirty_buffers(page); } /* @@ -2716,143 +2834,152 @@ static int reiserfs_set_page_dirty(struct page *page) { */ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) { - struct inode *inode = page->mapping->host ; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; - struct buffer_head *head ; - struct buffer_head *bh ; - int ret = 1 ; - - WARN_ON(PageChecked(page)); - spin_lock(&j->j_dirty_buffers_lock) ; - head = page_buffers(page) ; - bh = head ; - do { - if (bh->b_private) { - if (!buffer_dirty(bh) && !buffer_locked(bh)) { - reiserfs_free_jh(bh); - } else { - ret = 0 ; - break ; - } - } - bh = bh->b_this_page ; - } while (bh != head) ; - if (ret) - ret = try_to_free_buffers(page) ; - spin_unlock(&j->j_dirty_buffers_lock) ; - return ret ; + struct inode *inode = page->mapping->host; + struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); + struct buffer_head *head; + struct buffer_head *bh; + int ret = 1; + + WARN_ON(PageChecked(page)); + spin_lock(&j->j_dirty_buffers_lock); + head = page_buffers(page); + bh = head; + do { + if (bh->b_private) { + if (!buffer_dirty(bh) && !buffer_locked(bh)) { + reiserfs_free_jh(bh); + } else { + ret = 0; + break; + } + } + bh = bh->b_this_page; + } while (bh != head); + if (ret) + ret = try_to_free_buffers(page); + spin_unlock(&j->j_dirty_buffers_lock); + return ret; } /* We thank Mingming Cao for helping us understand in great detail what to do in this section of the code. */ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) { - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, reiserfs_get_blocks_direct_io, NULL); + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, + reiserfs_get_blocks_direct_io, NULL); } -int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { - struct inode *inode = dentry->d_inode ; - int error ; - unsigned int ia_valid = attr->ia_valid; - reiserfs_write_lock(inode->i_sb); - if (attr->ia_valid & ATTR_SIZE) { - /* version 2 items will be caught by the s_maxbytes check - ** done for us in vmtruncate - */ - if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && - attr->ia_size > MAX_NON_LFS) { - error = -EFBIG ; - goto out; - } - /* fill in hole pointers in the expanding truncate case. */ - if (attr->ia_size > inode->i_size) { - error = generic_cont_expand(inode, attr->ia_size) ; - if (REISERFS_I(inode)->i_prealloc_count > 0) { - int err; - struct reiserfs_transaction_handle th ; - /* we're changing at most 2 bitmaps, inode + super */ - err = journal_begin(&th, inode->i_sb, 4) ; - if (!err) { - reiserfs_discard_prealloc (&th, inode); - err = journal_end(&th, inode->i_sb, 4) ; +int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + unsigned int ia_valid = attr->ia_valid; + reiserfs_write_lock(inode->i_sb); + if (attr->ia_valid & ATTR_SIZE) { + /* version 2 items will be caught by the s_maxbytes check + ** done for us in vmtruncate + */ + if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && + attr->ia_size > MAX_NON_LFS) { + error = -EFBIG; + goto out; + } + /* fill in hole pointers in the expanding truncate case. */ + if (attr->ia_size > inode->i_size) { + error = generic_cont_expand(inode, attr->ia_size); + if (REISERFS_I(inode)->i_prealloc_count > 0) { + int err; + struct reiserfs_transaction_handle th; + /* we're changing at most 2 bitmaps, inode + super */ + err = journal_begin(&th, inode->i_sb, 4); + if (!err) { + reiserfs_discard_prealloc(&th, inode); + err = journal_end(&th, inode->i_sb, 4); + } + if (err) + error = err; + } + if (error) + goto out; } - if (err) - error = err; - } - if (error) - goto out; } - } - if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || - ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && - (get_inode_sd_version (inode) == STAT_DATA_V1)) { + if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || + ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && + (get_inode_sd_version(inode) == STAT_DATA_V1)) { /* stat data of format v3.5 has 16 bit uid and gid */ - error = -EINVAL; - goto out; - } - - error = inode_change_ok(inode, attr) ; - if (!error) { - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { - error = reiserfs_chown_xattrs (inode, attr); - - if (!error) { - struct reiserfs_transaction_handle th; - int jbegin_count = 2*(REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)+REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb))+2; - - /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ - error = journal_begin(&th, inode->i_sb, jbegin_count); - if (error) - goto out; - error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; - if (error) { - journal_end(&th, inode->i_sb, jbegin_count); - goto out; - } - /* Update corresponding info in inode so that everything is in - * one transaction */ - if (attr->ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (attr->ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - mark_inode_dirty(inode); - error = journal_end(&th, inode->i_sb, jbegin_count); - } - } - if (!error) - error = inode_setattr(inode, attr) ; - } + error = -EINVAL; + goto out; + } + error = inode_change_ok(inode, attr); + if (!error) { + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + error = reiserfs_chown_xattrs(inode, attr); + + if (!error) { + struct reiserfs_transaction_handle th; + int jbegin_count = + 2 * + (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + + REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + + 2; + + /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ + error = + journal_begin(&th, inode->i_sb, + jbegin_count); + if (error) + goto out; + error = + DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; + if (error) { + journal_end(&th, inode->i_sb, + jbegin_count); + goto out; + } + /* Update corresponding info in inode so that everything is in + * one transaction */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + mark_inode_dirty(inode); + error = + journal_end(&th, inode->i_sb, jbegin_count); + } + } + if (!error) + error = inode_setattr(inode, attr); + } - if (!error && reiserfs_posixacl (inode->i_sb)) { - if (attr->ia_valid & ATTR_MODE) - error = reiserfs_acl_chmod (inode); - } + if (!error && reiserfs_posixacl(inode->i_sb)) { + if (attr->ia_valid & ATTR_MODE) + error = reiserfs_acl_chmod(inode); + } -out: - reiserfs_write_unlock(inode->i_sb); - return error ; + out: + reiserfs_write_unlock(inode->i_sb); + return error; } - - struct address_space_operations reiserfs_address_space_operations = { - .writepage = reiserfs_writepage, - .readpage = reiserfs_readpage, - .readpages = reiserfs_readpages, - .releasepage = reiserfs_releasepage, - .invalidatepage = reiserfs_invalidatepage, - .sync_page = block_sync_page, - .prepare_write = reiserfs_prepare_write, - .commit_write = reiserfs_commit_write, - .bmap = reiserfs_aop_bmap, - .direct_IO = reiserfs_direct_IO, - .set_page_dirty = reiserfs_set_page_dirty, -} ; + .writepage = reiserfs_writepage, + .readpage = reiserfs_readpage, + .readpages = reiserfs_readpages, + .releasepage = reiserfs_releasepage, + .invalidatepage = reiserfs_invalidatepage, + .sync_page = block_sync_page, + .prepare_write = reiserfs_prepare_write, + .commit_write = reiserfs_commit_write, + .bmap = reiserfs_aop_bmap, + .direct_IO = reiserfs_direct_IO, + .set_page_dirty = reiserfs_set_page_dirty, +}; diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 76caedf737f..81fc00285f6 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -9,7 +9,7 @@ #include #include -static int reiserfs_unpack (struct inode * inode, struct file * filp); +static int reiserfs_unpack(struct inode *inode, struct file *filp); /* ** reiserfs_ioctl - handler for ioctl for inode @@ -19,69 +19,72 @@ static int reiserfs_unpack (struct inode * inode, struct file * filp); ** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION ** 3) That's all for a while ... */ -int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, + unsigned long arg) { unsigned int flags; switch (cmd) { - case REISERFS_IOC_UNPACK: - if( S_ISREG( inode -> i_mode ) ) { - if (arg) - return reiserfs_unpack (inode, filp); + case REISERFS_IOC_UNPACK: + if (S_ISREG(inode->i_mode)) { + if (arg) + return reiserfs_unpack(inode, filp); else return 0; } else return -ENOTTY; - /* following two cases are taken from fs/ext2/ioctl.c by Remy - Card (card@masi.ibp.fr) */ + /* following two cases are taken from fs/ext2/ioctl.c by Remy + Card (card@masi.ibp.fr) */ case REISERFS_IOC_GETFLAGS: - if (!reiserfs_attrs (inode->i_sb)) + if (!reiserfs_attrs(inode->i_sb)) return -ENOTTY; - flags = REISERFS_I(inode) -> i_attrs; - i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags ); - return put_user(flags, (int __user *) arg); - case REISERFS_IOC_SETFLAGS: { - if (!reiserfs_attrs (inode->i_sb)) - return -ENOTTY; + flags = REISERFS_I(inode)->i_attrs; + i_attrs_to_sd_attrs(inode, (__u16 *) & flags); + return put_user(flags, (int __user *)arg); + case REISERFS_IOC_SETFLAGS:{ + if (!reiserfs_attrs(inode->i_sb)) + return -ENOTTY; - if (IS_RDONLY(inode)) - return -EROFS; + if (IS_RDONLY(inode)) + return -EROFS; - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; + if ((current->fsuid != inode->i_uid) + && !capable(CAP_FOWNER)) + return -EPERM; - if (get_user(flags, (int __user *) arg)) - return -EFAULT; + if (get_user(flags, (int __user *)arg)) + return -EFAULT; - if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) && - !capable( CAP_LINUX_IMMUTABLE ) ) - return -EPERM; - - if( ( flags & REISERFS_NOTAIL_FL ) && - S_ISREG( inode -> i_mode ) ) { + if (((flags ^ REISERFS_I(inode)-> + i_attrs) & (REISERFS_IMMUTABLE_FL | + REISERFS_APPEND_FL)) + && !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + if ((flags & REISERFS_NOTAIL_FL) && + S_ISREG(inode->i_mode)) { int result; - result = reiserfs_unpack( inode, filp ); - if( result ) + result = reiserfs_unpack(inode, filp); + if (result) return result; + } + sd_attrs_to_i_attrs(flags, inode); + REISERFS_I(inode)->i_attrs = flags; + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + return 0; } - sd_attrs_to_i_attrs( flags, inode ); - REISERFS_I(inode) -> i_attrs = flags; - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - return 0; - } case REISERFS_IOC_GETVERSION: - return put_user(inode->i_generation, (int __user *) arg); + return put_user(inode->i_generation, (int __user *)arg); case REISERFS_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; if (IS_RDONLY(inode)) return -EROFS; - if (get_user(inode->i_generation, (int __user *) arg)) - return -EFAULT; + if (get_user(inode->i_generation, (int __user *)arg)) + return -EFAULT; inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); return 0; @@ -95,63 +98,65 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, ** Function try to convert tail from direct item into indirect. ** It set up nopack attribute in the REISERFS_I(inode)->nopack */ -static int reiserfs_unpack (struct inode * inode, struct file * filp) +static int reiserfs_unpack(struct inode *inode, struct file *filp) { - int retval = 0; - int index ; - struct page *page ; - struct address_space *mapping ; - unsigned long write_from ; - unsigned long blocksize = inode->i_sb->s_blocksize ; - - if (inode->i_size == 0) { - REISERFS_I(inode)->i_flags |= i_nopack_mask; - return 0 ; - } - /* ioctl already done */ - if (REISERFS_I(inode)->i_flags & i_nopack_mask) { - return 0 ; - } - reiserfs_write_lock(inode->i_sb); - - /* we need to make sure nobody is changing the file size beneath - ** us - */ - down(&inode->i_sem) ; - - write_from = inode->i_size & (blocksize - 1) ; - /* if we are on a block boundary, we are already unpacked. */ - if ( write_from == 0) { + int retval = 0; + int index; + struct page *page; + struct address_space *mapping; + unsigned long write_from; + unsigned long blocksize = inode->i_sb->s_blocksize; + + if (inode->i_size == 0) { + REISERFS_I(inode)->i_flags |= i_nopack_mask; + return 0; + } + /* ioctl already done */ + if (REISERFS_I(inode)->i_flags & i_nopack_mask) { + return 0; + } + reiserfs_write_lock(inode->i_sb); + + /* we need to make sure nobody is changing the file size beneath + ** us + */ + down(&inode->i_sem); + + write_from = inode->i_size & (blocksize - 1); + /* if we are on a block boundary, we are already unpacked. */ + if (write_from == 0) { + REISERFS_I(inode)->i_flags |= i_nopack_mask; + goto out; + } + + /* we unpack by finding the page with the tail, and calling + ** reiserfs_prepare_write on that page. This will force a + ** reiserfs_get_block to unpack the tail for us. + */ + index = inode->i_size >> PAGE_CACHE_SHIFT; + mapping = inode->i_mapping; + page = grab_cache_page(mapping, index); + retval = -ENOMEM; + if (!page) { + goto out; + } + retval = + mapping->a_ops->prepare_write(NULL, page, write_from, write_from); + if (retval) + goto out_unlock; + + /* conversion can change page contents, must flush */ + flush_dcache_page(page); + retval = + mapping->a_ops->commit_write(NULL, page, write_from, write_from); REISERFS_I(inode)->i_flags |= i_nopack_mask; - goto out ; - } - - /* we unpack by finding the page with the tail, and calling - ** reiserfs_prepare_write on that page. This will force a - ** reiserfs_get_block to unpack the tail for us. - */ - index = inode->i_size >> PAGE_CACHE_SHIFT ; - mapping = inode->i_mapping ; - page = grab_cache_page(mapping, index) ; - retval = -ENOMEM; - if (!page) { - goto out ; - } - retval = mapping->a_ops->prepare_write(NULL, page, write_from, write_from) ; - if (retval) - goto out_unlock ; - - /* conversion can change page contents, must flush */ - flush_dcache_page(page) ; - retval = mapping->a_ops->commit_write(NULL, page, write_from, write_from) ; - REISERFS_I(inode)->i_flags |= i_nopack_mask; - -out_unlock: - unlock_page(page) ; - page_cache_release(page) ; - -out: - up(&inode->i_sem) ; - reiserfs_write_unlock(inode->i_sb); - return retval; + + out_unlock: + unlock_page(page); + page_cache_release(page); + + out: + up(&inode->i_sem); + reiserfs_write_unlock(inode->i_sb); + return retval; } diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index e477aeba8c9..e237cd668e5 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c @@ -14,760 +14,729 @@ ////////////////////////////////////////////////////////////////////////////// // stat data functions // -static int sd_bytes_number (struct item_head * ih, int block_size) +static int sd_bytes_number(struct item_head *ih, int block_size) { - return 0; + return 0; } -static void sd_decrement_key (struct cpu_key * key) +static void sd_decrement_key(struct cpu_key *key) { - key->on_disk_key.k_objectid --; - set_cpu_key_k_type (key, TYPE_ANY); - set_cpu_key_k_offset(key, (loff_t)(-1)); + key->on_disk_key.k_objectid--; + set_cpu_key_k_type(key, TYPE_ANY); + set_cpu_key_k_offset(key, (loff_t) (-1)); } -static int sd_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) +static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize) { - return 0; + return 0; } - - -static char * print_time (time_t t) +static char *print_time(time_t t) { - static char timebuf[256]; + static char timebuf[256]; - sprintf (timebuf, "%ld", t); - return timebuf; + sprintf(timebuf, "%ld", t); + return timebuf; } - -static void sd_print_item (struct item_head * ih, char * item) +static void sd_print_item(struct item_head *ih, char *item) { - printk ("\tmode | size | nlinks | first direct | mtime\n"); - if (stat_data_v1 (ih)) { - struct stat_data_v1 * sd = (struct stat_data_v1 *)item; + printk("\tmode | size | nlinks | first direct | mtime\n"); + if (stat_data_v1(ih)) { + struct stat_data_v1 *sd = (struct stat_data_v1 *)item; - printk ("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd), - sd_v1_size(sd), sd_v1_nlink(sd), sd_v1_first_direct_byte(sd), - print_time( sd_v1_mtime(sd) ) ); - } else { - struct stat_data * sd = (struct stat_data *)item; + printk("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd), + sd_v1_size(sd), sd_v1_nlink(sd), + sd_v1_first_direct_byte(sd), + print_time(sd_v1_mtime(sd))); + } else { + struct stat_data *sd = (struct stat_data *)item; - printk ("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd), - (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd), - sd_v2_rdev(sd), print_time(sd_v2_mtime(sd))); - } + printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd), + (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd), + sd_v2_rdev(sd), print_time(sd_v2_mtime(sd))); + } } -static void sd_check_item (struct item_head * ih, char * item) +static void sd_check_item(struct item_head *ih, char *item) { - // FIXME: type something here! + // FIXME: type something here! } - -static int sd_create_vi (struct virtual_node * vn, - struct virtual_item * vi, - int is_affected, - int insert_size) +static int sd_create_vi(struct virtual_node *vn, + struct virtual_item *vi, + int is_affected, int insert_size) { - vi->vi_index = TYPE_STAT_DATA; - //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed? - return 0; + vi->vi_index = TYPE_STAT_DATA; + //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed? + return 0; } - -static int sd_check_left (struct virtual_item * vi, int free, - int start_skip, int end_skip) +static int sd_check_left(struct virtual_item *vi, int free, + int start_skip, int end_skip) { - if (start_skip || end_skip) - BUG (); - return -1; + if (start_skip || end_skip) + BUG(); + return -1; } - -static int sd_check_right (struct virtual_item * vi, int free) +static int sd_check_right(struct virtual_item *vi, int free) { - return -1; + return -1; } -static int sd_part_size (struct virtual_item * vi, int first, int count) +static int sd_part_size(struct virtual_item *vi, int first, int count) { - if (count) - BUG (); - return 0; + if (count) + BUG(); + return 0; } -static int sd_unit_num (struct virtual_item * vi) +static int sd_unit_num(struct virtual_item *vi) { - return vi->vi_item_len - IH_SIZE; + return vi->vi_item_len - IH_SIZE; } - -static void sd_print_vi (struct virtual_item * vi) +static void sd_print_vi(struct virtual_item *vi) { - reiserfs_warning (NULL, "STATDATA, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); + reiserfs_warning(NULL, "STATDATA, index %d, type 0x%x, %h", + vi->vi_index, vi->vi_type, vi->vi_ih); } static struct item_operations stat_data_ops = { - .bytes_number = sd_bytes_number, - .decrement_key = sd_decrement_key, - .is_left_mergeable = sd_is_left_mergeable, - .print_item = sd_print_item, - .check_item = sd_check_item, - - .create_vi = sd_create_vi, - .check_left = sd_check_left, - .check_right = sd_check_right, - .part_size = sd_part_size, - .unit_num = sd_unit_num, - .print_vi = sd_print_vi + .bytes_number = sd_bytes_number, + .decrement_key = sd_decrement_key, + .is_left_mergeable = sd_is_left_mergeable, + .print_item = sd_print_item, + .check_item = sd_check_item, + + .create_vi = sd_create_vi, + .check_left = sd_check_left, + .check_right = sd_check_right, + .part_size = sd_part_size, + .unit_num = sd_unit_num, + .print_vi = sd_print_vi }; - - ////////////////////////////////////////////////////////////////////////////// // direct item functions // -static int direct_bytes_number (struct item_head * ih, int block_size) +static int direct_bytes_number(struct item_head *ih, int block_size) { - return ih_item_len(ih); + return ih_item_len(ih); } - // FIXME: this should probably switch to indirect as well -static void direct_decrement_key (struct cpu_key * key) +static void direct_decrement_key(struct cpu_key *key) { - cpu_key_k_offset_dec (key); - if (cpu_key_k_offset (key) == 0) - set_cpu_key_k_type (key, TYPE_STAT_DATA); + cpu_key_k_offset_dec(key); + if (cpu_key_k_offset(key) == 0) + set_cpu_key_k_type(key, TYPE_STAT_DATA); } - -static int direct_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) +static int direct_is_left_mergeable(struct reiserfs_key *key, + unsigned long bsize) { - int version = le_key_version (key); - return ((le_key_k_offset (version, key) & (bsize - 1)) != 1); + int version = le_key_version(key); + return ((le_key_k_offset(version, key) & (bsize - 1)) != 1); } - -static void direct_print_item (struct item_head * ih, char * item) +static void direct_print_item(struct item_head *ih, char *item) { - int j = 0; + int j = 0; // return; - printk ("\""); - while (j < ih_item_len(ih)) - printk ("%c", item[j++]); - printk ("\"\n"); + printk("\""); + while (j < ih_item_len(ih)) + printk("%c", item[j++]); + printk("\"\n"); } - -static void direct_check_item (struct item_head * ih, char * item) +static void direct_check_item(struct item_head *ih, char *item) { - // FIXME: type something here! + // FIXME: type something here! } - -static int direct_create_vi (struct virtual_node * vn, - struct virtual_item * vi, - int is_affected, - int insert_size) +static int direct_create_vi(struct virtual_node *vn, + struct virtual_item *vi, + int is_affected, int insert_size) { - vi->vi_index = TYPE_DIRECT; - //vi->vi_type |= VI_TYPE_DIRECT; - return 0; + vi->vi_index = TYPE_DIRECT; + //vi->vi_type |= VI_TYPE_DIRECT; + return 0; } -static int direct_check_left (struct virtual_item * vi, int free, - int start_skip, int end_skip) +static int direct_check_left(struct virtual_item *vi, int free, + int start_skip, int end_skip) { - int bytes; + int bytes; - bytes = free - free % 8; - return bytes ?: -1; + bytes = free - free % 8; + return bytes ? : -1; } - -static int direct_check_right (struct virtual_item * vi, int free) +static int direct_check_right(struct virtual_item *vi, int free) { - return direct_check_left (vi, free, 0, 0); + return direct_check_left(vi, free, 0, 0); } -static int direct_part_size (struct virtual_item * vi, int first, int count) +static int direct_part_size(struct virtual_item *vi, int first, int count) { - return count; + return count; } - -static int direct_unit_num (struct virtual_item * vi) +static int direct_unit_num(struct virtual_item *vi) { - return vi->vi_item_len - IH_SIZE; + return vi->vi_item_len - IH_SIZE; } - -static void direct_print_vi (struct virtual_item * vi) +static void direct_print_vi(struct virtual_item *vi) { - reiserfs_warning (NULL, "DIRECT, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); + reiserfs_warning(NULL, "DIRECT, index %d, type 0x%x, %h", + vi->vi_index, vi->vi_type, vi->vi_ih); } static struct item_operations direct_ops = { - .bytes_number = direct_bytes_number, - .decrement_key = direct_decrement_key, - .is_left_mergeable = direct_is_left_mergeable, - .print_item = direct_print_item, - .check_item = direct_check_item, - - .create_vi = direct_create_vi, - .check_left = direct_check_left, - .check_right = direct_check_right, - .part_size = direct_part_size, - .unit_num = direct_unit_num, - .print_vi = direct_print_vi + .bytes_number = direct_bytes_number, + .decrement_key = direct_decrement_key, + .is_left_mergeable = direct_is_left_mergeable, + .print_item = direct_print_item, + .check_item = direct_check_item, + + .create_vi = direct_create_vi, + .check_left = direct_check_left, + .check_right = direct_check_right, + .part_size = direct_part_size, + .unit_num = direct_unit_num, + .print_vi = direct_print_vi }; - - ////////////////////////////////////////////////////////////////////////////// // indirect item functions // -static int indirect_bytes_number (struct item_head * ih, int block_size) +static int indirect_bytes_number(struct item_head *ih, int block_size) { - return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); + return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); } - // decrease offset, if it becomes 0, change type to stat data -static void indirect_decrement_key (struct cpu_key * key) +static void indirect_decrement_key(struct cpu_key *key) { - cpu_key_k_offset_dec (key); - if (cpu_key_k_offset (key) == 0) - set_cpu_key_k_type (key, TYPE_STAT_DATA); + cpu_key_k_offset_dec(key); + if (cpu_key_k_offset(key) == 0) + set_cpu_key_k_type(key, TYPE_STAT_DATA); } - // if it is not first item of the body, then it is mergeable -static int indirect_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) +static int indirect_is_left_mergeable(struct reiserfs_key *key, + unsigned long bsize) { - int version = le_key_version (key); - return (le_key_k_offset (version, key) != 1); + int version = le_key_version(key); + return (le_key_k_offset(version, key) != 1); } - // printing of indirect item -static void start_new_sequence (__u32 * start, int * len, __u32 new) +static void start_new_sequence(__u32 * start, int *len, __u32 new) { - *start = new; - *len = 1; + *start = new; + *len = 1; } - -static int sequence_finished (__u32 start, int * len, __u32 new) +static int sequence_finished(__u32 start, int *len, __u32 new) { - if (start == INT_MAX) - return 1; + if (start == INT_MAX) + return 1; - if (start == 0 && new == 0) { - (*len) ++; - return 0; - } - if (start != 0 && (start + *len) == new) { - (*len) ++; - return 0; - } - return 1; + if (start == 0 && new == 0) { + (*len)++; + return 0; + } + if (start != 0 && (start + *len) == new) { + (*len)++; + return 0; + } + return 1; } -static void print_sequence (__u32 start, int len) +static void print_sequence(__u32 start, int len) { - if (start == INT_MAX) - return; + if (start == INT_MAX) + return; - if (len == 1) - printk (" %d", start); - else - printk (" %d(%d)", start, len); + if (len == 1) + printk(" %d", start); + else + printk(" %d(%d)", start, len); } - -static void indirect_print_item (struct item_head * ih, char * item) +static void indirect_print_item(struct item_head *ih, char *item) { - int j; - __le32 * unp; - __u32 prev = INT_MAX; - int num; + int j; + __le32 *unp; + __u32 prev = INT_MAX; + int num; - unp = (__le32 *)item; + unp = (__le32 *) item; - if (ih_item_len(ih) % UNFM_P_SIZE) - reiserfs_warning (NULL, "indirect_print_item: invalid item len"); + if (ih_item_len(ih) % UNFM_P_SIZE) + reiserfs_warning(NULL, "indirect_print_item: invalid item len"); - printk ("%d pointers\n[ ", (int)I_UNFM_NUM (ih)); - for (j = 0; j < I_UNFM_NUM (ih); j ++) { - if (sequence_finished (prev, &num, get_block_num(unp, j))) { - print_sequence (prev, num); - start_new_sequence (&prev, &num, get_block_num(unp, j)); + printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih)); + for (j = 0; j < I_UNFM_NUM(ih); j++) { + if (sequence_finished(prev, &num, get_block_num(unp, j))) { + print_sequence(prev, num); + start_new_sequence(&prev, &num, get_block_num(unp, j)); + } } - } - print_sequence (prev, num); - printk ("]\n"); + print_sequence(prev, num); + printk("]\n"); } -static void indirect_check_item (struct item_head * ih, char * item) +static void indirect_check_item(struct item_head *ih, char *item) { - // FIXME: type something here! + // FIXME: type something here! } - -static int indirect_create_vi (struct virtual_node * vn, - struct virtual_item * vi, - int is_affected, - int insert_size) +static int indirect_create_vi(struct virtual_node *vn, + struct virtual_item *vi, + int is_affected, int insert_size) { - vi->vi_index = TYPE_INDIRECT; - //vi->vi_type |= VI_TYPE_INDIRECT; - return 0; + vi->vi_index = TYPE_INDIRECT; + //vi->vi_type |= VI_TYPE_INDIRECT; + return 0; } -static int indirect_check_left (struct virtual_item * vi, int free, - int start_skip, int end_skip) +static int indirect_check_left(struct virtual_item *vi, int free, + int start_skip, int end_skip) { - int bytes; + int bytes; - bytes = free - free % UNFM_P_SIZE; - return bytes ?: -1; + bytes = free - free % UNFM_P_SIZE; + return bytes ? : -1; } - -static int indirect_check_right (struct virtual_item * vi, int free) +static int indirect_check_right(struct virtual_item *vi, int free) { - return indirect_check_left (vi, free, 0, 0); + return indirect_check_left(vi, free, 0, 0); } - - // return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right) -static int indirect_part_size (struct virtual_item * vi, int first, int units) +static int indirect_part_size(struct virtual_item *vi, int first, int units) { - // unit of indirect item is byte (yet) - return units; + // unit of indirect item is byte (yet) + return units; } -static int indirect_unit_num (struct virtual_item * vi) +static int indirect_unit_num(struct virtual_item *vi) { - // unit of indirect item is byte (yet) - return vi->vi_item_len - IH_SIZE; + // unit of indirect item is byte (yet) + return vi->vi_item_len - IH_SIZE; } -static void indirect_print_vi (struct virtual_item * vi) +static void indirect_print_vi(struct virtual_item *vi) { - reiserfs_warning (NULL, "INDIRECT, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); + reiserfs_warning(NULL, "INDIRECT, index %d, type 0x%x, %h", + vi->vi_index, vi->vi_type, vi->vi_ih); } static struct item_operations indirect_ops = { - .bytes_number = indirect_bytes_number, - .decrement_key = indirect_decrement_key, - .is_left_mergeable = indirect_is_left_mergeable, - .print_item = indirect_print_item, - .check_item = indirect_check_item, - - .create_vi = indirect_create_vi, - .check_left = indirect_check_left, - .check_right = indirect_check_right, - .part_size = indirect_part_size, - .unit_num = indirect_unit_num, - .print_vi = indirect_print_vi + .bytes_number = indirect_bytes_number, + .decrement_key = indirect_decrement_key, + .is_left_mergeable = indirect_is_left_mergeable, + .print_item = indirect_print_item, + .check_item = indirect_check_item, + + .create_vi = indirect_create_vi, + .check_left = indirect_check_left, + .check_right = indirect_check_right, + .part_size = indirect_part_size, + .unit_num = indirect_unit_num, + .print_vi = indirect_print_vi }; - ////////////////////////////////////////////////////////////////////////////// // direntry functions // - -static int direntry_bytes_number (struct item_head * ih, int block_size) +static int direntry_bytes_number(struct item_head *ih, int block_size) { - reiserfs_warning (NULL, "vs-16090: direntry_bytes_number: " - "bytes number is asked for direntry"); - return 0; -} - -static void direntry_decrement_key (struct cpu_key * key) -{ - cpu_key_k_offset_dec (key); - if (cpu_key_k_offset (key) == 0) - set_cpu_key_k_type (key, TYPE_STAT_DATA); + reiserfs_warning(NULL, "vs-16090: direntry_bytes_number: " + "bytes number is asked for direntry"); + return 0; } - -static int direntry_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) +static void direntry_decrement_key(struct cpu_key *key) { - if (le32_to_cpu (key->u.k_offset_v1.k_offset) == DOT_OFFSET) - return 0; - return 1; - + cpu_key_k_offset_dec(key); + if (cpu_key_k_offset(key) == 0) + set_cpu_key_k_type(key, TYPE_STAT_DATA); } - -static void direntry_print_item (struct item_head * ih, char * item) +static int direntry_is_left_mergeable(struct reiserfs_key *key, + unsigned long bsize) { - int i; - int namelen; - struct reiserfs_de_head * deh; - char * name; - static char namebuf [80]; - - - printk ("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name", "Key of pointed object", "Hash", "Gen number", "Status"); + if (le32_to_cpu(key->u.k_offset_v1.k_offset) == DOT_OFFSET) + return 0; + return 1; - deh = (struct reiserfs_de_head *)item; +} - for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { - namelen = (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - deh_location(deh); - name = item + deh_location(deh); - if (name[namelen-1] == 0) - namelen = strlen (name); - namebuf[0] = '"'; - if (namelen > sizeof (namebuf) - 3) { - strncpy (namebuf + 1, name, sizeof (namebuf) - 3); - namebuf[sizeof (namebuf) - 2] = '"'; - namebuf[sizeof (namebuf) - 1] = 0; - } else { - memcpy (namebuf + 1, name, namelen); - namebuf[namelen + 1] = '"'; - namebuf[namelen + 2] = 0; +static void direntry_print_item(struct item_head *ih, char *item) +{ + int i; + int namelen; + struct reiserfs_de_head *deh; + char *name; + static char namebuf[80]; + + printk("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name", + "Key of pointed object", "Hash", "Gen number", "Status"); + + deh = (struct reiserfs_de_head *)item; + + for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) { + namelen = + (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - + deh_location(deh); + name = item + deh_location(deh); + if (name[namelen - 1] == 0) + namelen = strlen(name); + namebuf[0] = '"'; + if (namelen > sizeof(namebuf) - 3) { + strncpy(namebuf + 1, name, sizeof(namebuf) - 3); + namebuf[sizeof(namebuf) - 2] = '"'; + namebuf[sizeof(namebuf) - 1] = 0; + } else { + memcpy(namebuf + 1, name, namelen); + namebuf[namelen + 1] = '"'; + namebuf[namelen + 2] = 0; + } + + printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n", + i, namebuf, + deh_dir_id(deh), deh_objectid(deh), + GET_HASH_VALUE(deh_offset(deh)), + GET_GENERATION_NUMBER((deh_offset(deh))), + (de_hidden(deh)) ? "HIDDEN" : "VISIBLE"); } - - printk ("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n", - i, namebuf, - deh_dir_id(deh), deh_objectid(deh), - GET_HASH_VALUE (deh_offset (deh)), GET_GENERATION_NUMBER ((deh_offset (deh))), - (de_hidden (deh)) ? "HIDDEN" : "VISIBLE"); - } } - -static void direntry_check_item (struct item_head * ih, char * item) +static void direntry_check_item(struct item_head *ih, char *item) { - int i; - struct reiserfs_de_head * deh; + int i; + struct reiserfs_de_head *deh; - // FIXME: type something here! - deh = (struct reiserfs_de_head *)item; - for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { - ; - } + // FIXME: type something here! + deh = (struct reiserfs_de_head *)item; + for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) { + ; + } } - - #define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1 /* * function returns old entry number in directory item in real node * using new entry number in virtual item in virtual node */ -static inline int old_entry_num (int is_affected, int virtual_entry_num, int pos_in_item, int mode) +static inline int old_entry_num(int is_affected, int virtual_entry_num, + int pos_in_item, int mode) { - if ( mode == M_INSERT || mode == M_DELETE) - return virtual_entry_num; - - if (!is_affected) - /* cut or paste is applied to another item */ - return virtual_entry_num; - - if (virtual_entry_num < pos_in_item) - return virtual_entry_num; + if (mode == M_INSERT || mode == M_DELETE) + return virtual_entry_num; - if (mode == M_CUT) - return virtual_entry_num + 1; + if (!is_affected) + /* cut or paste is applied to another item */ + return virtual_entry_num; - RFALSE( mode != M_PASTE || virtual_entry_num == 0, - "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'", mode); - - return virtual_entry_num - 1; -} + if (virtual_entry_num < pos_in_item) + return virtual_entry_num; + if (mode == M_CUT) + return virtual_entry_num + 1; + RFALSE(mode != M_PASTE || virtual_entry_num == 0, + "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'", + mode); + return virtual_entry_num - 1; +} /* Create an array of sizes of directory entries for virtual item. Return space used by an item. FIXME: no control over consuming of space used by this item handler */ -static int direntry_create_vi (struct virtual_node * vn, - struct virtual_item * vi, - int is_affected, - int insert_size) -{ - struct direntry_uarea * dir_u = vi->vi_uarea; - int i, j; - int size = sizeof (struct direntry_uarea); - struct reiserfs_de_head * deh; - - vi->vi_index = TYPE_DIRENTRY; - - if (!(vi->vi_ih) || !vi->vi_item) - BUG (); - - - dir_u->flags = 0; - if (le_ih_k_offset (vi->vi_ih) == DOT_OFFSET) - dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM; - - deh = (struct reiserfs_de_head *)(vi->vi_item); - - - /* virtual directory item have this amount of entry after */ - dir_u->entry_count = ih_entry_count (vi->vi_ih) + - ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 : - (vn->vn_mode == M_PASTE ? 1 : 0)) : 0); - - for (i = 0; i < dir_u->entry_count; i ++) { - j = old_entry_num (is_affected, i, vn->vn_pos_in_item, vn->vn_mode); - dir_u->entry_sizes[i] = (j ? deh_location( &(deh[j - 1]) ) : - ih_item_len (vi->vi_ih)) - - deh_location( &(deh[j])) + DEH_SIZE; - } - - size += (dir_u->entry_count * sizeof (short)); - - /* set size of pasted entry */ - if (is_affected && vn->vn_mode == M_PASTE) - dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size; +static int direntry_create_vi(struct virtual_node *vn, + struct virtual_item *vi, + int is_affected, int insert_size) +{ + struct direntry_uarea *dir_u = vi->vi_uarea; + int i, j; + int size = sizeof(struct direntry_uarea); + struct reiserfs_de_head *deh; + vi->vi_index = TYPE_DIRENTRY; + + if (!(vi->vi_ih) || !vi->vi_item) + BUG(); + + dir_u->flags = 0; + if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET) + dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM; + + deh = (struct reiserfs_de_head *)(vi->vi_item); + + /* virtual directory item have this amount of entry after */ + dir_u->entry_count = ih_entry_count(vi->vi_ih) + + ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 : + (vn->vn_mode == M_PASTE ? 1 : 0)) : 0); + + for (i = 0; i < dir_u->entry_count; i++) { + j = old_entry_num(is_affected, i, vn->vn_pos_in_item, + vn->vn_mode); + dir_u->entry_sizes[i] = + (j ? deh_location(&(deh[j - 1])) : ih_item_len(vi->vi_ih)) - + deh_location(&(deh[j])) + DEH_SIZE; + } + + size += (dir_u->entry_count * sizeof(short)); + + /* set size of pasted entry */ + if (is_affected && vn->vn_mode == M_PASTE) + dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size; #ifdef CONFIG_REISERFS_CHECK - /* compare total size of entries with item length */ - { - int k, l; - - l = 0; - for (k = 0; k < dir_u->entry_count; k ++) - l += dir_u->entry_sizes[k]; - - if (l + IH_SIZE != vi->vi_item_len + - ((is_affected && (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT)) ? insert_size : 0) ) { - reiserfs_panic (NULL, "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item", - vn->vn_mode, insert_size); + /* compare total size of entries with item length */ + { + int k, l; + + l = 0; + for (k = 0; k < dir_u->entry_count; k++) + l += dir_u->entry_sizes[k]; + + if (l + IH_SIZE != vi->vi_item_len + + ((is_affected + && (vn->vn_mode == M_PASTE + || vn->vn_mode == M_CUT)) ? insert_size : 0)) { + reiserfs_panic(NULL, + "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item", + vn->vn_mode, insert_size); + } } - } #endif - return size; - + return size; } - // // return number of entries which may fit into specified amount of // free space, or -1 if free space is not enough even for 1 entry // -static int direntry_check_left (struct virtual_item * vi, int free, - int start_skip, int end_skip) +static int direntry_check_left(struct virtual_item *vi, int free, + int start_skip, int end_skip) { - int i; - int entries = 0; - struct direntry_uarea * dir_u = vi->vi_uarea; + int i; + int entries = 0; + struct direntry_uarea *dir_u = vi->vi_uarea; - for (i = start_skip; i < dir_u->entry_count - end_skip; i ++) { - if (dir_u->entry_sizes[i] > free) - /* i-th entry doesn't fit into the remaining free space */ - break; - - free -= dir_u->entry_sizes[i]; - entries ++; - } + for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { + if (dir_u->entry_sizes[i] > free) + /* i-th entry doesn't fit into the remaining free space */ + break; - if (entries == dir_u->entry_count) { - reiserfs_panic (NULL, "free space %d, entry_count %d\n", free, dir_u->entry_count); - } + free -= dir_u->entry_sizes[i]; + entries++; + } - /* "." and ".." can not be separated from each other */ - if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries < 2) - entries = 0; - - return entries ?: -1; -} + if (entries == dir_u->entry_count) { + reiserfs_panic(NULL, "free space %d, entry_count %d\n", free, + dir_u->entry_count); + } + /* "." and ".." can not be separated from each other */ + if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) + && entries < 2) + entries = 0; -static int direntry_check_right (struct virtual_item * vi, int free) + return entries ? : -1; +} + +static int direntry_check_right(struct virtual_item *vi, int free) { - int i; - int entries = 0; - struct direntry_uarea * dir_u = vi->vi_uarea; - - for (i = dir_u->entry_count - 1; i >= 0; i --) { - if (dir_u->entry_sizes[i] > free) - /* i-th entry doesn't fit into the remaining free space */ - break; - - free -= dir_u->entry_sizes[i]; - entries ++; - } - if (entries == dir_u->entry_count) - BUG (); + int i; + int entries = 0; + struct direntry_uarea *dir_u = vi->vi_uarea; - /* "." and ".." can not be separated from each other */ - if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries > dir_u->entry_count - 2) - entries = dir_u->entry_count - 2; + for (i = dir_u->entry_count - 1; i >= 0; i--) { + if (dir_u->entry_sizes[i] > free) + /* i-th entry doesn't fit into the remaining free space */ + break; - return entries ?: -1; -} + free -= dir_u->entry_sizes[i]; + entries++; + } + if (entries == dir_u->entry_count) + BUG(); + /* "." and ".." can not be separated from each other */ + if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) + && entries > dir_u->entry_count - 2) + entries = dir_u->entry_count - 2; + + return entries ? : -1; +} /* sum of entry sizes between from-th and to-th entries including both edges */ -static int direntry_part_size (struct virtual_item * vi, int first, int count) +static int direntry_part_size(struct virtual_item *vi, int first, int count) { - int i, retval; - int from, to; - struct direntry_uarea * dir_u = vi->vi_uarea; - - retval = 0; - if (first == 0) - from = 0; - else - from = dir_u->entry_count - count; - to = from + count - 1; + int i, retval; + int from, to; + struct direntry_uarea *dir_u = vi->vi_uarea; - for (i = from; i <= to; i ++) - retval += dir_u->entry_sizes[i]; + retval = 0; + if (first == 0) + from = 0; + else + from = dir_u->entry_count - count; + to = from + count - 1; - return retval; -} + for (i = from; i <= to; i++) + retval += dir_u->entry_sizes[i]; -static int direntry_unit_num (struct virtual_item * vi) -{ - struct direntry_uarea * dir_u = vi->vi_uarea; - - return dir_u->entry_count; + return retval; } +static int direntry_unit_num(struct virtual_item *vi) +{ + struct direntry_uarea *dir_u = vi->vi_uarea; + return dir_u->entry_count; +} -static void direntry_print_vi (struct virtual_item * vi) +static void direntry_print_vi(struct virtual_item *vi) { - int i; - struct direntry_uarea * dir_u = vi->vi_uarea; + int i; + struct direntry_uarea *dir_u = vi->vi_uarea; - reiserfs_warning (NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", - vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); - printk ("%d entries: ", dir_u->entry_count); - for (i = 0; i < dir_u->entry_count; i ++) - printk ("%d ", dir_u->entry_sizes[i]); - printk ("\n"); + reiserfs_warning(NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", + vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); + printk("%d entries: ", dir_u->entry_count); + for (i = 0; i < dir_u->entry_count; i++) + printk("%d ", dir_u->entry_sizes[i]); + printk("\n"); } static struct item_operations direntry_ops = { - .bytes_number = direntry_bytes_number, - .decrement_key = direntry_decrement_key, - .is_left_mergeable = direntry_is_left_mergeable, - .print_item = direntry_print_item, - .check_item = direntry_check_item, - - .create_vi = direntry_create_vi, - .check_left = direntry_check_left, - .check_right = direntry_check_right, - .part_size = direntry_part_size, - .unit_num = direntry_unit_num, - .print_vi = direntry_print_vi + .bytes_number = direntry_bytes_number, + .decrement_key = direntry_decrement_key, + .is_left_mergeable = direntry_is_left_mergeable, + .print_item = direntry_print_item, + .check_item = direntry_check_item, + + .create_vi = direntry_create_vi, + .check_left = direntry_check_left, + .check_right = direntry_check_right, + .part_size = direntry_part_size, + .unit_num = direntry_unit_num, + .print_vi = direntry_print_vi }; - ////////////////////////////////////////////////////////////////////////////// // Error catching functions to catch errors caused by incorrect item types. // -static int errcatch_bytes_number (struct item_head * ih, int block_size) +static int errcatch_bytes_number(struct item_head *ih, int block_size) { - reiserfs_warning (NULL, "green-16001: Invalid item type observed, run fsck ASAP"); - return 0; + reiserfs_warning(NULL, + "green-16001: Invalid item type observed, run fsck ASAP"); + return 0; } -static void errcatch_decrement_key (struct cpu_key * key) +static void errcatch_decrement_key(struct cpu_key *key) { - reiserfs_warning (NULL, "green-16002: Invalid item type observed, run fsck ASAP"); + reiserfs_warning(NULL, + "green-16002: Invalid item type observed, run fsck ASAP"); } - -static int errcatch_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) +static int errcatch_is_left_mergeable(struct reiserfs_key *key, + unsigned long bsize) { - reiserfs_warning (NULL, "green-16003: Invalid item type observed, run fsck ASAP"); - return 0; + reiserfs_warning(NULL, + "green-16003: Invalid item type observed, run fsck ASAP"); + return 0; } - -static void errcatch_print_item (struct item_head * ih, char * item) +static void errcatch_print_item(struct item_head *ih, char *item) { - reiserfs_warning (NULL, "green-16004: Invalid item type observed, run fsck ASAP"); + reiserfs_warning(NULL, + "green-16004: Invalid item type observed, run fsck ASAP"); } - -static void errcatch_check_item (struct item_head * ih, char * item) +static void errcatch_check_item(struct item_head *ih, char *item) { - reiserfs_warning (NULL, "green-16005: Invalid item type observed, run fsck ASAP"); + reiserfs_warning(NULL, + "green-16005: Invalid item type observed, run fsck ASAP"); } -static int errcatch_create_vi (struct virtual_node * vn, - struct virtual_item * vi, - int is_affected, - int insert_size) +static int errcatch_create_vi(struct virtual_node *vn, + struct virtual_item *vi, + int is_affected, int insert_size) { - reiserfs_warning (NULL, "green-16006: Invalid item type observed, run fsck ASAP"); - return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where - // this operation is called from is of return type void. + reiserfs_warning(NULL, + "green-16006: Invalid item type observed, run fsck ASAP"); + return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where + // this operation is called from is of return type void. } -static int errcatch_check_left (struct virtual_item * vi, int free, - int start_skip, int end_skip) +static int errcatch_check_left(struct virtual_item *vi, int free, + int start_skip, int end_skip) { - reiserfs_warning (NULL, "green-16007: Invalid item type observed, run fsck ASAP"); - return -1; + reiserfs_warning(NULL, + "green-16007: Invalid item type observed, run fsck ASAP"); + return -1; } - -static int errcatch_check_right (struct virtual_item * vi, int free) +static int errcatch_check_right(struct virtual_item *vi, int free) { - reiserfs_warning (NULL, "green-16008: Invalid item type observed, run fsck ASAP"); - return -1; + reiserfs_warning(NULL, + "green-16008: Invalid item type observed, run fsck ASAP"); + return -1; } -static int errcatch_part_size (struct virtual_item * vi, int first, int count) +static int errcatch_part_size(struct virtual_item *vi, int first, int count) { - reiserfs_warning (NULL, "green-16009: Invalid item type observed, run fsck ASAP"); - return 0; + reiserfs_warning(NULL, + "green-16009: Invalid item type observed, run fsck ASAP"); + return 0; } -static int errcatch_unit_num (struct virtual_item * vi) +static int errcatch_unit_num(struct virtual_item *vi) { - reiserfs_warning (NULL, "green-16010: Invalid item type observed, run fsck ASAP"); - return 0; + reiserfs_warning(NULL, + "green-16010: Invalid item type observed, run fsck ASAP"); + return 0; } -static void errcatch_print_vi (struct virtual_item * vi) +static void errcatch_print_vi(struct virtual_item *vi) { - reiserfs_warning (NULL, "green-16011: Invalid item type observed, run fsck ASAP"); + reiserfs_warning(NULL, + "green-16011: Invalid item type observed, run fsck ASAP"); } static struct item_operations errcatch_ops = { - errcatch_bytes_number, - errcatch_decrement_key, - errcatch_is_left_mergeable, - errcatch_print_item, - errcatch_check_item, - - errcatch_create_vi, - errcatch_check_left, - errcatch_check_right, - errcatch_part_size, - errcatch_unit_num, - errcatch_print_vi + errcatch_bytes_number, + errcatch_decrement_key, + errcatch_is_left_mergeable, + errcatch_print_item, + errcatch_check_item, + + errcatch_create_vi, + errcatch_check_left, + errcatch_check_right, + errcatch_part_size, + errcatch_unit_num, + errcatch_print_vi }; - - ////////////////////////////////////////////////////////////////////////////// // // @@ -775,15 +744,11 @@ static struct item_operations errcatch_ops = { #error Item types must use disk-format assigned values. #endif -struct item_operations * item_ops [TYPE_ANY + 1] = { - &stat_data_ops, - &indirect_ops, - &direct_ops, - &direntry_ops, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - &errcatch_ops /* This is to catch errors with invalid type (15th entry for TYPE_ANY) */ +struct item_operations *item_ops[TYPE_ANY + 1] = { + &stat_data_ops, + &indirect_ops, + &direct_ops, + &direntry_ops, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &errcatch_ops /* This is to catch errors with invalid type (15th entry for TYPE_ANY) */ }; - - - - diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index d1bcf0da672..c66c27ec410 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -55,7 +55,6 @@ #include #include - /* gets a struct reiserfs_journal_list * from a list head */ #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ j_list)) @@ -69,55 +68,61 @@ static int reiserfs_mounted_fs_count; static struct workqueue_struct *commit_wq; -#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit - structs at 4k */ -#define BUFNR 64 /*read ahead */ +#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit + structs at 4k */ +#define BUFNR 64 /*read ahead */ /* cnode stat bits. Move these into reiserfs_fs.h */ #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ -#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ +#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ #define BLOCK_DIRTIED 5 - /* journal list state bits */ #define LIST_TOUCHED 1 #define LIST_DIRTY 2 -#define LIST_COMMIT_PENDING 4 /* someone will commit this list */ +#define LIST_COMMIT_PENDING 4 /* someone will commit this list */ /* flags for do_journal_end */ #define FLUSH_ALL 1 /* flush commit and real blocks */ #define COMMIT_NOW 2 /* end and commit this transaction */ -#define WAIT 4 /* wait for the log blocks to hit the disk*/ - -static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ; -static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; -static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; -static int can_dirty(struct reiserfs_journal_cnode *cn) ; -static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks); -static int release_journal_dev( struct super_block *super, - struct reiserfs_journal *journal ); +#define WAIT 4 /* wait for the log blocks to hit the disk */ + +static int do_journal_end(struct reiserfs_transaction_handle *, + struct super_block *, unsigned long nblocks, + int flags); +static int flush_journal_list(struct super_block *s, + struct reiserfs_journal_list *jl, int flushall); +static int flush_commit_list(struct super_block *s, + struct reiserfs_journal_list *jl, int flushall); +static int can_dirty(struct reiserfs_journal_cnode *cn); +static int journal_join(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks); +static int release_journal_dev(struct super_block *super, + struct reiserfs_journal *journal); static int dirty_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl); + struct reiserfs_journal_list *jl); static void flush_async_commits(void *p); static void queue_log_writer(struct super_block *s); /* values for join in do_journal_begin_r */ enum { - JBEGIN_REG = 0, /* regular journal begin */ - JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ - JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ + JBEGIN_REG = 0, /* regular journal begin */ + JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ + JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ }; static int do_journal_begin_r(struct reiserfs_transaction_handle *th, - struct super_block * p_s_sb, - unsigned long nblocks,int join); + struct super_block *p_s_sb, + unsigned long nblocks, int join); -static void init_journal_hash(struct super_block *p_s_sb) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - memset(journal->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; +static void init_journal_hash(struct super_block *p_s_sb) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + memset(journal->j_hash_table, 0, + JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); } /* @@ -125,149 +130,159 @@ static void init_journal_hash(struct super_block *p_s_sb) { ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for ** more details. */ -static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) { - if (bh) { - clear_buffer_dirty(bh); - clear_buffer_journal_test(bh); - } - return 0 ; +static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) +{ + if (bh) { + clear_buffer_dirty(bh); + clear_buffer_journal_test(bh); + } + return 0; } static void disable_barrier(struct super_block *s) { - REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); - printk("reiserfs: disabling flush barriers on %s\n", reiserfs_bdevname(s)); -} - -static struct reiserfs_bitmap_node * -allocate_bitmap_node(struct super_block *p_s_sb) { - struct reiserfs_bitmap_node *bn ; - static int id; - - bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, p_s_sb) ; - if (!bn) { - return NULL ; - } - bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb) ; - if (!bn->data) { - reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; - return NULL ; - } - bn->id = id++ ; - memset(bn->data, 0, p_s_sb->s_blocksize) ; - INIT_LIST_HEAD(&bn->list) ; - return bn ; -} - -static struct reiserfs_bitmap_node * -get_bitmap_node(struct super_block *p_s_sb) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_bitmap_node *bn = NULL; - struct list_head *entry = journal->j_bitmap_nodes.next ; - - journal->j_used_bitmap_nodes++ ; -repeat: - - if(entry != &journal->j_bitmap_nodes) { - bn = list_entry(entry, struct reiserfs_bitmap_node, list) ; - list_del(entry) ; - memset(bn->data, 0, p_s_sb->s_blocksize) ; - journal->j_free_bitmap_nodes-- ; - return bn ; - } - bn = allocate_bitmap_node(p_s_sb) ; - if (!bn) { - yield(); - goto repeat ; - } - return bn ; + REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); + printk("reiserfs: disabling flush barriers on %s\n", + reiserfs_bdevname(s)); +} + +static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block + *p_s_sb) +{ + struct reiserfs_bitmap_node *bn; + static int id; + + bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, + p_s_sb); + if (!bn) { + return NULL; + } + bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb); + if (!bn->data) { + reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb); + return NULL; + } + bn->id = id++; + memset(bn->data, 0, p_s_sb->s_blocksize); + INIT_LIST_HEAD(&bn->list); + return bn; +} + +static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_bitmap_node *bn = NULL; + struct list_head *entry = journal->j_bitmap_nodes.next; + + journal->j_used_bitmap_nodes++; + repeat: + + if (entry != &journal->j_bitmap_nodes) { + bn = list_entry(entry, struct reiserfs_bitmap_node, list); + list_del(entry); + memset(bn->data, 0, p_s_sb->s_blocksize); + journal->j_free_bitmap_nodes--; + return bn; + } + bn = allocate_bitmap_node(p_s_sb); + if (!bn) { + yield(); + goto repeat; + } + return bn; } static inline void free_bitmap_node(struct super_block *p_s_sb, - struct reiserfs_bitmap_node *bn) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - journal->j_used_bitmap_nodes-- ; - if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { - reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ; - reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; - } else { - list_add(&bn->list, &journal->j_bitmap_nodes) ; - journal->j_free_bitmap_nodes++ ; - } -} - -static void allocate_bitmap_nodes(struct super_block *p_s_sb) { - int i ; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_bitmap_node *bn = NULL ; - for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) { - bn = allocate_bitmap_node(p_s_sb) ; - if (bn) { - list_add(&bn->list, &journal->j_bitmap_nodes) ; - journal->j_free_bitmap_nodes++ ; - } else { - break ; // this is ok, we'll try again when more are needed - } - } + struct reiserfs_bitmap_node *bn) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + journal->j_used_bitmap_nodes--; + if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { + reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb); + reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb); + } else { + list_add(&bn->list, &journal->j_bitmap_nodes); + journal->j_free_bitmap_nodes++; + } +} + +static void allocate_bitmap_nodes(struct super_block *p_s_sb) +{ + int i; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_bitmap_node *bn = NULL; + for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { + bn = allocate_bitmap_node(p_s_sb); + if (bn) { + list_add(&bn->list, &journal->j_bitmap_nodes); + journal->j_free_bitmap_nodes++; + } else { + break; // this is ok, we'll try again when more are needed + } + } } static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, - struct reiserfs_list_bitmap *jb) { - int bmap_nr = block / (p_s_sb->s_blocksize << 3) ; - int bit_nr = block % (p_s_sb->s_blocksize << 3) ; + struct reiserfs_list_bitmap *jb) +{ + int bmap_nr = block / (p_s_sb->s_blocksize << 3); + int bit_nr = block % (p_s_sb->s_blocksize << 3); - if (!jb->bitmaps[bmap_nr]) { - jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ; - } - set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data) ; - return 0 ; + if (!jb->bitmaps[bmap_nr]) { + jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); + } + set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); + return 0; } static void cleanup_bitmap_list(struct super_block *p_s_sb, - struct reiserfs_list_bitmap *jb) { - int i; - if (jb->bitmaps == NULL) - return; - - for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) { - if (jb->bitmaps[i]) { - free_bitmap_node(p_s_sb, jb->bitmaps[i]) ; - jb->bitmaps[i] = NULL ; - } - } + struct reiserfs_list_bitmap *jb) +{ + int i; + if (jb->bitmaps == NULL) + return; + + for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) { + if (jb->bitmaps[i]) { + free_bitmap_node(p_s_sb, jb->bitmaps[i]); + jb->bitmaps[i] = NULL; + } + } } /* ** only call this on FS unmount. */ static int free_list_bitmaps(struct super_block *p_s_sb, - struct reiserfs_list_bitmap *jb_array) { - int i ; - struct reiserfs_list_bitmap *jb ; - for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { - jb = jb_array + i ; - jb->journal_list = NULL ; - cleanup_bitmap_list(p_s_sb, jb) ; - vfree(jb->bitmaps) ; - jb->bitmaps = NULL ; - } - return 0; -} - -static int free_bitmap_nodes(struct super_block *p_s_sb) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct list_head *next = journal->j_bitmap_nodes.next ; - struct reiserfs_bitmap_node *bn ; - - while(next != &journal->j_bitmap_nodes) { - bn = list_entry(next, struct reiserfs_bitmap_node, list) ; - list_del(next) ; - reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ; - reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; - next = journal->j_bitmap_nodes.next ; - journal->j_free_bitmap_nodes-- ; - } - - return 0 ; + struct reiserfs_list_bitmap *jb_array) +{ + int i; + struct reiserfs_list_bitmap *jb; + for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { + jb = jb_array + i; + jb->journal_list = NULL; + cleanup_bitmap_list(p_s_sb, jb); + vfree(jb->bitmaps); + jb->bitmaps = NULL; + } + return 0; +} + +static int free_bitmap_nodes(struct super_block *p_s_sb) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct list_head *next = journal->j_bitmap_nodes.next; + struct reiserfs_bitmap_node *bn; + + while (next != &journal->j_bitmap_nodes) { + bn = list_entry(next, struct reiserfs_bitmap_node, list); + list_del(next); + reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb); + reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb); + next = journal->j_bitmap_nodes.next; + journal->j_free_bitmap_nodes--; + } + + return 0; } /* @@ -275,59 +290,65 @@ static int free_bitmap_nodes(struct super_block *p_s_sb) { ** jb_array is the array to be filled in. */ int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, - struct reiserfs_list_bitmap *jb_array, - int bmap_nr) { - int i ; - int failed = 0 ; - struct reiserfs_list_bitmap *jb ; - int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ; - - for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { - jb = jb_array + i ; - jb->journal_list = NULL ; - jb->bitmaps = vmalloc( mem ) ; - if (!jb->bitmaps) { - reiserfs_warning(p_s_sb, "clm-2000, unable to allocate bitmaps for journal lists") ; - failed = 1; - break ; - } - memset(jb->bitmaps, 0, mem) ; - } - if (failed) { - free_list_bitmaps(p_s_sb, jb_array) ; - return -1 ; - } - return 0 ; + struct reiserfs_list_bitmap *jb_array, + int bmap_nr) +{ + int i; + int failed = 0; + struct reiserfs_list_bitmap *jb; + int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); + + for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { + jb = jb_array + i; + jb->journal_list = NULL; + jb->bitmaps = vmalloc(mem); + if (!jb->bitmaps) { + reiserfs_warning(p_s_sb, + "clm-2000, unable to allocate bitmaps for journal lists"); + failed = 1; + break; + } + memset(jb->bitmaps, 0, mem); + } + if (failed) { + free_list_bitmaps(p_s_sb, jb_array); + return -1; + } + return 0; } /* ** find an available list bitmap. If you can't find one, flush a commit list ** and try again */ -static struct reiserfs_list_bitmap * -get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { - int i,j ; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_list_bitmap *jb = NULL ; - - for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) { - i = journal->j_list_bitmap_index ; - journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ; - jb = journal->j_list_bitmap + i ; - if (journal->j_list_bitmap[i].journal_list) { - flush_commit_list(p_s_sb, journal->j_list_bitmap[i].journal_list, 1) ; - if (!journal->j_list_bitmap[i].journal_list) { - break ; - } - } else { - break ; - } - } - if (jb->journal_list) { /* double check to make sure if flushed correctly */ - return NULL ; - } - jb->journal_list = jl ; - return jb ; +static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb, + struct reiserfs_journal_list + *jl) +{ + int i, j; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_list_bitmap *jb = NULL; + + for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { + i = journal->j_list_bitmap_index; + journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; + jb = journal->j_list_bitmap + i; + if (journal->j_list_bitmap[i].journal_list) { + flush_commit_list(p_s_sb, + journal->j_list_bitmap[i]. + journal_list, 1); + if (!journal->j_list_bitmap[i].journal_list) { + break; + } + } else { + break; + } + } + if (jb->journal_list) { /* double check to make sure if flushed correctly */ + return NULL; + } + jb->journal_list = jl; + return jb; } /* @@ -335,104 +356,114 @@ get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { ** Uses the cnode->next and cnode->prev pointers ** returns NULL on failure */ -static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) { - struct reiserfs_journal_cnode *head ; - int i ; - if (num_cnodes <= 0) { - return NULL ; - } - head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; - if (!head) { - return NULL ; - } - memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; - head[0].prev = NULL ; - head[0].next = head + 1 ; - for (i = 1 ; i < num_cnodes; i++) { - head[i].prev = head + (i - 1) ; - head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */ - } - head[num_cnodes -1].next = NULL ; - return head ; +static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) +{ + struct reiserfs_journal_cnode *head; + int i; + if (num_cnodes <= 0) { + return NULL; + } + head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); + if (!head) { + return NULL; + } + memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)); + head[0].prev = NULL; + head[0].next = head + 1; + for (i = 1; i < num_cnodes; i++) { + head[i].prev = head + (i - 1); + head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ + } + head[num_cnodes - 1].next = NULL; + return head; } /* ** pulls a cnode off the free list, or returns NULL on failure */ -static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) { - struct reiserfs_journal_cnode *cn ; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - - reiserfs_check_lock_depth(p_s_sb, "get_cnode") ; - - if (journal->j_cnode_free <= 0) { - return NULL ; - } - journal->j_cnode_used++ ; - journal->j_cnode_free-- ; - cn = journal->j_cnode_free_list ; - if (!cn) { - return cn ; - } - if (cn->next) { - cn->next->prev = NULL ; - } - journal->j_cnode_free_list = cn->next ; - memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; - return cn ; +static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) +{ + struct reiserfs_journal_cnode *cn; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + + reiserfs_check_lock_depth(p_s_sb, "get_cnode"); + + if (journal->j_cnode_free <= 0) { + return NULL; + } + journal->j_cnode_used++; + journal->j_cnode_free--; + cn = journal->j_cnode_free_list; + if (!cn) { + return cn; + } + if (cn->next) { + cn->next->prev = NULL; + } + journal->j_cnode_free_list = cn->next; + memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); + return cn; } /* ** returns a cnode to the free list */ -static void free_cnode(struct super_block *p_s_sb, struct reiserfs_journal_cnode *cn) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); +static void free_cnode(struct super_block *p_s_sb, + struct reiserfs_journal_cnode *cn) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); - reiserfs_check_lock_depth(p_s_sb, "free_cnode") ; + reiserfs_check_lock_depth(p_s_sb, "free_cnode"); - journal->j_cnode_used-- ; - journal->j_cnode_free++ ; - /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ - cn->next = journal->j_cnode_free_list ; - if (journal->j_cnode_free_list) { - journal->j_cnode_free_list->prev = cn ; - } - cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */ - journal->j_cnode_free_list = cn ; + journal->j_cnode_used--; + journal->j_cnode_free++; + /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ + cn->next = journal->j_cnode_free_list; + if (journal->j_cnode_free_list) { + journal->j_cnode_free_list->prev = cn; + } + cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ + journal->j_cnode_free_list = cn; } -static void clear_prepared_bits(struct buffer_head *bh) { - clear_buffer_journal_prepared (bh); - clear_buffer_journal_restore_dirty (bh); +static void clear_prepared_bits(struct buffer_head *bh) +{ + clear_buffer_journal_prepared(bh); + clear_buffer_journal_restore_dirty(bh); } /* utility function to force a BUG if it is called without the big ** kernel lock held. caller is the string printed just before calling BUG() */ -void reiserfs_check_lock_depth(struct super_block *sb, char *caller) { +void reiserfs_check_lock_depth(struct super_block *sb, char *caller) +{ #ifdef CONFIG_SMP - if (current->lock_depth < 0) { - reiserfs_panic (sb, "%s called without kernel lock held", caller) ; - } + if (current->lock_depth < 0) { + reiserfs_panic(sb, "%s called without kernel lock held", + caller); + } #else - ; + ; #endif } /* return a cnode with same dev, block number and size in table, or null if not found */ -static inline struct reiserfs_journal_cnode * -get_journal_hash_dev(struct super_block *sb, - struct reiserfs_journal_cnode **table, - long bl) +static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct + super_block + *sb, + struct + reiserfs_journal_cnode + **table, + long bl) { - struct reiserfs_journal_cnode *cn ; - cn = journal_hash(table, sb, bl) ; - while(cn) { - if (cn->blocknr == bl && cn->sb == sb) - return cn ; - cn = cn->hnext ; - } - return (struct reiserfs_journal_cnode *)0 ; + struct reiserfs_journal_cnode *cn; + cn = journal_hash(table, sb, bl); + while (cn) { + if (cn->blocknr == bl && cn->sb == sb) + return cn; + cn = cn->hnext; + } + return (struct reiserfs_journal_cnode *)0; } /* @@ -454,91 +485,103 @@ get_journal_hash_dev(struct super_block *sb, ** */ int reiserfs_in_journal(struct super_block *p_s_sb, - int bmap_nr, int bit_nr, int search_all, - b_blocknr_t *next_zero_bit) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_journal_cnode *cn ; - struct reiserfs_list_bitmap *jb ; - int i ; - unsigned long bl; - - *next_zero_bit = 0 ; /* always start this at zero. */ - - PROC_INFO_INC( p_s_sb, journal.in_journal ); - /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. - ** if we crash before the transaction that freed it commits, this transaction won't - ** have committed either, and the block will never be written - */ - if (search_all) { - for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { - PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap ); - jb = journal->j_list_bitmap + i ; - if (jb->journal_list && jb->bitmaps[bmap_nr] && - test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) { - *next_zero_bit = find_next_zero_bit((unsigned long *) - (jb->bitmaps[bmap_nr]->data), - p_s_sb->s_blocksize << 3, bit_nr+1) ; - return 1 ; - } - } - } - - bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; - /* is it in any old transactions? */ - if (search_all && (cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { - return 1; - } - - /* is it in the current transaction. This should never happen */ - if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { - BUG(); - return 1; - } - - PROC_INFO_INC( p_s_sb, journal.in_journal_reusable ); - /* safe for reuse */ - return 0 ; + int bmap_nr, int bit_nr, int search_all, + b_blocknr_t * next_zero_bit) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_journal_cnode *cn; + struct reiserfs_list_bitmap *jb; + int i; + unsigned long bl; + + *next_zero_bit = 0; /* always start this at zero. */ + + PROC_INFO_INC(p_s_sb, journal.in_journal); + /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. + ** if we crash before the transaction that freed it commits, this transaction won't + ** have committed either, and the block will never be written + */ + if (search_all) { + for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { + PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap); + jb = journal->j_list_bitmap + i; + if (jb->journal_list && jb->bitmaps[bmap_nr] && + test_bit(bit_nr, + (unsigned long *)jb->bitmaps[bmap_nr]-> + data)) { + *next_zero_bit = + find_next_zero_bit((unsigned long *) + (jb->bitmaps[bmap_nr]-> + data), + p_s_sb->s_blocksize << 3, + bit_nr + 1); + return 1; + } + } + } + + bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; + /* is it in any old transactions? */ + if (search_all + && (cn = + get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { + return 1; + } + + /* is it in the current transaction. This should never happen */ + if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { + BUG(); + return 1; + } + + PROC_INFO_INC(p_s_sb, journal.in_journal_reusable); + /* safe for reuse */ + return 0; } /* insert cn into table */ -static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) { - struct reiserfs_journal_cnode *cn_orig ; +static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, + struct reiserfs_journal_cnode *cn) +{ + struct reiserfs_journal_cnode *cn_orig; - cn_orig = journal_hash(table, cn->sb, cn->blocknr) ; - cn->hnext = cn_orig ; - cn->hprev = NULL ; - if (cn_orig) { - cn_orig->hprev = cn ; - } - journal_hash(table, cn->sb, cn->blocknr) = cn ; + cn_orig = journal_hash(table, cn->sb, cn->blocknr); + cn->hnext = cn_orig; + cn->hprev = NULL; + if (cn_orig) { + cn_orig->hprev = cn; + } + journal_hash(table, cn->sb, cn->blocknr) = cn; } /* lock the current transaction */ -inline static void lock_journal(struct super_block *p_s_sb) { - PROC_INFO_INC( p_s_sb, journal.lock_journal ); - down(&SB_JOURNAL(p_s_sb)->j_lock); +inline static void lock_journal(struct super_block *p_s_sb) +{ + PROC_INFO_INC(p_s_sb, journal.lock_journal); + down(&SB_JOURNAL(p_s_sb)->j_lock); } /* unlock the current transaction */ -inline static void unlock_journal(struct super_block *p_s_sb) { - up(&SB_JOURNAL(p_s_sb)->j_lock); +inline static void unlock_journal(struct super_block *p_s_sb) +{ + up(&SB_JOURNAL(p_s_sb)->j_lock); } static inline void get_journal_list(struct reiserfs_journal_list *jl) { - jl->j_refcount++; + jl->j_refcount++; } static inline void put_journal_list(struct super_block *s, - struct reiserfs_journal_list *jl) + struct reiserfs_journal_list *jl) { - if (jl->j_refcount < 1) { - reiserfs_panic (s, "trans id %lu, refcount at %d", jl->j_trans_id, - jl->j_refcount); - } - if (--jl->j_refcount == 0) - reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s); + if (jl->j_refcount < 1) { + reiserfs_panic(s, "trans id %lu, refcount at %d", + jl->j_trans_id, jl->j_refcount); + } + if (--jl->j_refcount == 0) + reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s); } /* @@ -546,358 +589,375 @@ static inline void put_journal_list(struct super_block *s, ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a ** transaction. */ -static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { +static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, + struct reiserfs_journal_list *jl) +{ - struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ; - if (jb) { - cleanup_bitmap_list(p_s_sb, jb) ; - } - jl->j_list_bitmap->journal_list = NULL ; - jl->j_list_bitmap = NULL ; + struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; + if (jb) { + cleanup_bitmap_list(p_s_sb, jb); + } + jl->j_list_bitmap->journal_list = NULL; + jl->j_list_bitmap = NULL; } static int journal_list_still_alive(struct super_block *s, - unsigned long trans_id) -{ - struct reiserfs_journal *journal = SB_JOURNAL (s); - struct list_head *entry = &journal->j_journal_list; - struct reiserfs_journal_list *jl; - - if (!list_empty(entry)) { - jl = JOURNAL_LIST_ENTRY(entry->next); - if (jl->j_trans_id <= trans_id) { - return 1; - } - } - return 0; -} - -static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { - char b[BDEVNAME_SIZE]; - - if (buffer_journaled(bh)) { - reiserfs_warning(NULL, "clm-2084: pinned buffer %lu:%s sent to disk", - bh->b_blocknr, bdevname(bh->b_bdev, b)) ; - } - if (uptodate) - set_buffer_uptodate(bh) ; - else - clear_buffer_uptodate(bh) ; - unlock_buffer(bh) ; - put_bh(bh) ; -} - -static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) { - if (uptodate) - set_buffer_uptodate(bh) ; - else - clear_buffer_uptodate(bh) ; - unlock_buffer(bh) ; - put_bh(bh) ; -} - -static void submit_logged_buffer(struct buffer_head *bh) { - get_bh(bh) ; - bh->b_end_io = reiserfs_end_buffer_io_sync ; - clear_buffer_journal_new (bh); - clear_buffer_dirty(bh) ; - if (!test_clear_buffer_journal_test (bh)) - BUG(); - if (!buffer_uptodate(bh)) - BUG(); - submit_bh(WRITE, bh) ; -} - -static void submit_ordered_buffer(struct buffer_head *bh) { - get_bh(bh) ; - bh->b_end_io = reiserfs_end_ordered_io; - clear_buffer_dirty(bh) ; - if (!buffer_uptodate(bh)) - BUG(); - submit_bh(WRITE, bh) ; -} - -static int submit_barrier_buffer(struct buffer_head *bh) { - get_bh(bh) ; - bh->b_end_io = reiserfs_end_ordered_io; - clear_buffer_dirty(bh) ; - if (!buffer_uptodate(bh)) - BUG(); - return submit_bh(WRITE_BARRIER, bh) ; + unsigned long trans_id) +{ + struct reiserfs_journal *journal = SB_JOURNAL(s); + struct list_head *entry = &journal->j_journal_list; + struct reiserfs_journal_list *jl; + + if (!list_empty(entry)) { + jl = JOURNAL_LIST_ENTRY(entry->next); + if (jl->j_trans_id <= trans_id) { + return 1; + } + } + return 0; +} + +static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) +{ + char b[BDEVNAME_SIZE]; + + if (buffer_journaled(bh)) { + reiserfs_warning(NULL, + "clm-2084: pinned buffer %lu:%s sent to disk", + bh->b_blocknr, bdevname(bh->b_bdev, b)); + } + if (uptodate) + set_buffer_uptodate(bh); + else + clear_buffer_uptodate(bh); + unlock_buffer(bh); + put_bh(bh); +} + +static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) +{ + if (uptodate) + set_buffer_uptodate(bh); + else + clear_buffer_uptodate(bh); + unlock_buffer(bh); + put_bh(bh); +} + +static void submit_logged_buffer(struct buffer_head *bh) +{ + get_bh(bh); + bh->b_end_io = reiserfs_end_buffer_io_sync; + clear_buffer_journal_new(bh); + clear_buffer_dirty(bh); + if (!test_clear_buffer_journal_test(bh)) + BUG(); + if (!buffer_uptodate(bh)) + BUG(); + submit_bh(WRITE, bh); +} + +static void submit_ordered_buffer(struct buffer_head *bh) +{ + get_bh(bh); + bh->b_end_io = reiserfs_end_ordered_io; + clear_buffer_dirty(bh); + if (!buffer_uptodate(bh)) + BUG(); + submit_bh(WRITE, bh); +} + +static int submit_barrier_buffer(struct buffer_head *bh) +{ + get_bh(bh); + bh->b_end_io = reiserfs_end_ordered_io; + clear_buffer_dirty(bh); + if (!buffer_uptodate(bh)) + BUG(); + return submit_bh(WRITE_BARRIER, bh); } static void check_barrier_completion(struct super_block *s, - struct buffer_head *bh) { - if (buffer_eopnotsupp(bh)) { - clear_buffer_eopnotsupp(bh); - disable_barrier(s); - set_buffer_uptodate(bh); - set_buffer_dirty(bh); - sync_dirty_buffer(bh); - } + struct buffer_head *bh) +{ + if (buffer_eopnotsupp(bh)) { + clear_buffer_eopnotsupp(bh); + disable_barrier(s); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + } } #define CHUNK_SIZE 32 struct buffer_chunk { - struct buffer_head *bh[CHUNK_SIZE]; - int nr; + struct buffer_head *bh[CHUNK_SIZE]; + int nr; }; -static void write_chunk(struct buffer_chunk *chunk) { - int i; - get_fs_excl(); - for (i = 0; i < chunk->nr ; i++) { - submit_logged_buffer(chunk->bh[i]) ; - } - chunk->nr = 0; - put_fs_excl(); +static void write_chunk(struct buffer_chunk *chunk) +{ + int i; + get_fs_excl(); + for (i = 0; i < chunk->nr; i++) { + submit_logged_buffer(chunk->bh[i]); + } + chunk->nr = 0; + put_fs_excl(); } -static void write_ordered_chunk(struct buffer_chunk *chunk) { - int i; - get_fs_excl(); - for (i = 0; i < chunk->nr ; i++) { - submit_ordered_buffer(chunk->bh[i]) ; - } - chunk->nr = 0; - put_fs_excl(); +static void write_ordered_chunk(struct buffer_chunk *chunk) +{ + int i; + get_fs_excl(); + for (i = 0; i < chunk->nr; i++) { + submit_ordered_buffer(chunk->bh[i]); + } + chunk->nr = 0; + put_fs_excl(); } static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, - spinlock_t *lock, - void (fn)(struct buffer_chunk *)) + spinlock_t * lock, void (fn) (struct buffer_chunk *)) { - int ret = 0; - if (chunk->nr >= CHUNK_SIZE) - BUG(); - chunk->bh[chunk->nr++] = bh; - if (chunk->nr >= CHUNK_SIZE) { - ret = 1; - if (lock) - spin_unlock(lock); - fn(chunk); - if (lock) - spin_lock(lock); - } - return ret; + int ret = 0; + if (chunk->nr >= CHUNK_SIZE) + BUG(); + chunk->bh[chunk->nr++] = bh; + if (chunk->nr >= CHUNK_SIZE) { + ret = 1; + if (lock) + spin_unlock(lock); + fn(chunk); + if (lock) + spin_lock(lock); + } + return ret; } - static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); -static struct reiserfs_jh *alloc_jh(void) { - struct reiserfs_jh *jh; - while(1) { - jh = kmalloc(sizeof(*jh), GFP_NOFS); - if (jh) { - atomic_inc(&nr_reiserfs_jh); - return jh; +static struct reiserfs_jh *alloc_jh(void) +{ + struct reiserfs_jh *jh; + while (1) { + jh = kmalloc(sizeof(*jh), GFP_NOFS); + if (jh) { + atomic_inc(&nr_reiserfs_jh); + return jh; + } + yield(); } - yield(); - } } /* * we want to free the jh when the buffer has been written * and waited on */ -void reiserfs_free_jh(struct buffer_head *bh) { - struct reiserfs_jh *jh; - - jh = bh->b_private; - if (jh) { - bh->b_private = NULL; - jh->bh = NULL; - list_del_init(&jh->list); - kfree(jh); - if (atomic_read(&nr_reiserfs_jh) <= 0) - BUG(); - atomic_dec(&nr_reiserfs_jh); - put_bh(bh); - } +void reiserfs_free_jh(struct buffer_head *bh) +{ + struct reiserfs_jh *jh; + + jh = bh->b_private; + if (jh) { + bh->b_private = NULL; + jh->bh = NULL; + list_del_init(&jh->list); + kfree(jh); + if (atomic_read(&nr_reiserfs_jh) <= 0) + BUG(); + atomic_dec(&nr_reiserfs_jh); + put_bh(bh); + } } static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, - int tail) + int tail) { - struct reiserfs_jh *jh; + struct reiserfs_jh *jh; - if (bh->b_private) { - spin_lock(&j->j_dirty_buffers_lock); - if (!bh->b_private) { - spin_unlock(&j->j_dirty_buffers_lock); - goto no_jh; + if (bh->b_private) { + spin_lock(&j->j_dirty_buffers_lock); + if (!bh->b_private) { + spin_unlock(&j->j_dirty_buffers_lock); + goto no_jh; + } + jh = bh->b_private; + list_del_init(&jh->list); + } else { + no_jh: + get_bh(bh); + jh = alloc_jh(); + spin_lock(&j->j_dirty_buffers_lock); + /* buffer must be locked for __add_jh, should be able to have + * two adds at the same time + */ + if (bh->b_private) + BUG(); + jh->bh = bh; + bh->b_private = jh; } - jh = bh->b_private; - list_del_init(&jh->list); - } else { -no_jh: - get_bh(bh); - jh = alloc_jh(); - spin_lock(&j->j_dirty_buffers_lock); - /* buffer must be locked for __add_jh, should be able to have - * two adds at the same time - */ - if (bh->b_private) - BUG(); - jh->bh = bh; - bh->b_private = jh; - } - jh->jl = j->j_current_jl; - if (tail) - list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); - else { - list_add_tail(&jh->list, &jh->jl->j_bh_list); - } - spin_unlock(&j->j_dirty_buffers_lock); - return 0; + jh->jl = j->j_current_jl; + if (tail) + list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); + else { + list_add_tail(&jh->list, &jh->jl->j_bh_list); + } + spin_unlock(&j->j_dirty_buffers_lock); + return 0; } -int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) { - return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); +int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) +{ + return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); } -int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) { - return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); +int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) +{ + return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); } #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) -static int write_ordered_buffers(spinlock_t *lock, +static int write_ordered_buffers(spinlock_t * lock, struct reiserfs_journal *j, - struct reiserfs_journal_list *jl, + struct reiserfs_journal_list *jl, struct list_head *list) { - struct buffer_head *bh; - struct reiserfs_jh *jh; - int ret = j->j_errno; - struct buffer_chunk chunk; - struct list_head tmp; - INIT_LIST_HEAD(&tmp); - - chunk.nr = 0; - spin_lock(lock); - while(!list_empty(list)) { - jh = JH_ENTRY(list->next); - bh = jh->bh; - get_bh(bh); - if (test_set_buffer_locked(bh)) { - if (!buffer_dirty(bh)) { - list_del_init(&jh->list); - list_add(&jh->list, &tmp); - goto loop_next; - } - spin_unlock(lock); - if (chunk.nr) + struct buffer_head *bh; + struct reiserfs_jh *jh; + int ret = j->j_errno; + struct buffer_chunk chunk; + struct list_head tmp; + INIT_LIST_HEAD(&tmp); + + chunk.nr = 0; + spin_lock(lock); + while (!list_empty(list)) { + jh = JH_ENTRY(list->next); + bh = jh->bh; + get_bh(bh); + if (test_set_buffer_locked(bh)) { + if (!buffer_dirty(bh)) { + list_del_init(&jh->list); + list_add(&jh->list, &tmp); + goto loop_next; + } + spin_unlock(lock); + if (chunk.nr) + write_ordered_chunk(&chunk); + wait_on_buffer(bh); + cond_resched(); + spin_lock(lock); + goto loop_next; + } + if (buffer_dirty(bh)) { + list_del_init(&jh->list); + list_add(&jh->list, &tmp); + add_to_chunk(&chunk, bh, lock, write_ordered_chunk); + } else { + reiserfs_free_jh(bh); + unlock_buffer(bh); + } + loop_next: + put_bh(bh); + cond_resched_lock(lock); + } + if (chunk.nr) { + spin_unlock(lock); write_ordered_chunk(&chunk); - wait_on_buffer(bh); - cond_resched(); - spin_lock(lock); - goto loop_next; - } - if (buffer_dirty(bh)) { - list_del_init(&jh->list); - list_add(&jh->list, &tmp); - add_to_chunk(&chunk, bh, lock, write_ordered_chunk); - } else { - reiserfs_free_jh(bh); - unlock_buffer(bh); + spin_lock(lock); } -loop_next: - put_bh(bh); - cond_resched_lock(lock); - } - if (chunk.nr) { - spin_unlock(lock); - write_ordered_chunk(&chunk); - spin_lock(lock); - } - while(!list_empty(&tmp)) { - jh = JH_ENTRY(tmp.prev); - bh = jh->bh; - get_bh(bh); - reiserfs_free_jh(bh); - - if (buffer_locked(bh)) { - spin_unlock(lock); - wait_on_buffer(bh); - spin_lock(lock); + while (!list_empty(&tmp)) { + jh = JH_ENTRY(tmp.prev); + bh = jh->bh; + get_bh(bh); + reiserfs_free_jh(bh); + + if (buffer_locked(bh)) { + spin_unlock(lock); + wait_on_buffer(bh); + spin_lock(lock); + } + if (!buffer_uptodate(bh)) { + ret = -EIO; + } + put_bh(bh); + cond_resched_lock(lock); } - if (!buffer_uptodate(bh)) { - ret = -EIO; - } - put_bh(bh); - cond_resched_lock(lock); - } - spin_unlock(lock); - return ret; -} - -static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) { - struct reiserfs_journal *journal = SB_JOURNAL (s); - struct reiserfs_journal_list *other_jl; - struct reiserfs_journal_list *first_jl; - struct list_head *entry; - unsigned long trans_id = jl->j_trans_id; - unsigned long other_trans_id; - unsigned long first_trans_id; - -find_first: - /* - * first we walk backwards to find the oldest uncommitted transation - */ - first_jl = jl; - entry = jl->j_list.prev; - while(1) { - other_jl = JOURNAL_LIST_ENTRY(entry); - if (entry == &journal->j_journal_list || - atomic_read(&other_jl->j_older_commits_done)) - break; - - first_jl = other_jl; - entry = other_jl->j_list.prev; - } - - /* if we didn't find any older uncommitted transactions, return now */ - if (first_jl == jl) { - return 0; - } - - first_trans_id = first_jl->j_trans_id; + spin_unlock(lock); + return ret; +} - entry = &first_jl->j_list; - while(1) { - other_jl = JOURNAL_LIST_ENTRY(entry); - other_trans_id = other_jl->j_trans_id; +static int flush_older_commits(struct super_block *s, + struct reiserfs_journal_list *jl) +{ + struct reiserfs_journal *journal = SB_JOURNAL(s); + struct reiserfs_journal_list *other_jl; + struct reiserfs_journal_list *first_jl; + struct list_head *entry; + unsigned long trans_id = jl->j_trans_id; + unsigned long other_trans_id; + unsigned long first_trans_id; + + find_first: + /* + * first we walk backwards to find the oldest uncommitted transation + */ + first_jl = jl; + entry = jl->j_list.prev; + while (1) { + other_jl = JOURNAL_LIST_ENTRY(entry); + if (entry == &journal->j_journal_list || + atomic_read(&other_jl->j_older_commits_done)) + break; - if (other_trans_id < trans_id) { - if (atomic_read(&other_jl->j_commit_left) != 0) { - flush_commit_list(s, other_jl, 0); + first_jl = other_jl; + entry = other_jl->j_list.prev; + } - /* list we were called with is gone, return */ - if (!journal_list_still_alive(s, trans_id)) - return 1; + /* if we didn't find any older uncommitted transactions, return now */ + if (first_jl == jl) { + return 0; + } - /* the one we just flushed is gone, this means all - * older lists are also gone, so first_jl is no longer - * valid either. Go back to the beginning. - */ - if (!journal_list_still_alive(s, other_trans_id)) { - goto find_first; + first_trans_id = first_jl->j_trans_id; + + entry = &first_jl->j_list; + while (1) { + other_jl = JOURNAL_LIST_ENTRY(entry); + other_trans_id = other_jl->j_trans_id; + + if (other_trans_id < trans_id) { + if (atomic_read(&other_jl->j_commit_left) != 0) { + flush_commit_list(s, other_jl, 0); + + /* list we were called with is gone, return */ + if (!journal_list_still_alive(s, trans_id)) + return 1; + + /* the one we just flushed is gone, this means all + * older lists are also gone, so first_jl is no longer + * valid either. Go back to the beginning. + */ + if (!journal_list_still_alive + (s, other_trans_id)) { + goto find_first; + } + } + entry = entry->next; + if (entry == &journal->j_journal_list) + return 0; + } else { + return 0; } - } - entry = entry->next; - if (entry == &journal->j_journal_list) - return 0; - } else { - return 0; } - } - return 0; + return 0; } -int reiserfs_async_progress_wait(struct super_block *s) { - DEFINE_WAIT(wait); - struct reiserfs_journal *j = SB_JOURNAL(s); - if (atomic_read(&j->j_async_throttle)) - blk_congestion_wait(WRITE, HZ/10); - return 0; +int reiserfs_async_progress_wait(struct super_block *s) +{ + DEFINE_WAIT(wait); + struct reiserfs_journal *j = SB_JOURNAL(s); + if (atomic_read(&j->j_async_throttle)) + blk_congestion_wait(WRITE, HZ / 10); + return 0; } /* @@ -907,212 +967,225 @@ int reiserfs_async_progress_wait(struct super_block *s) { ** Before the commit block can by written, every other log block must be safely on disk ** */ -static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { - int i; - int bn ; - struct buffer_head *tbh = NULL ; - unsigned long trans_id = jl->j_trans_id; - struct reiserfs_journal *journal = SB_JOURNAL (s); - int barrier = 0; - int retval = 0; - - reiserfs_check_lock_depth(s, "flush_commit_list") ; - - if (atomic_read(&jl->j_older_commits_done)) { - return 0 ; - } - - get_fs_excl(); - - /* before we can put our commit blocks on disk, we have to make sure everyone older than - ** us is on disk too - */ - BUG_ON (jl->j_len <= 0); - BUG_ON (trans_id == journal->j_trans_id); - - get_journal_list(jl); - if (flushall) { - if (flush_older_commits(s, jl) == 1) { - /* list disappeared during flush_older_commits. return */ - goto put_jl; - } - } - - /* make sure nobody is trying to flush this one at the same time */ - down(&jl->j_commit_lock); - if (!journal_list_still_alive(s, trans_id)) { - up(&jl->j_commit_lock); - goto put_jl; - } - BUG_ON (jl->j_trans_id == 0); - - /* this commit is done, exit */ - if (atomic_read(&(jl->j_commit_left)) <= 0) { - if (flushall) { - atomic_set(&(jl->j_older_commits_done), 1) ; - } - up(&jl->j_commit_lock); - goto put_jl; - } - - if (!list_empty(&jl->j_bh_list)) { - unlock_kernel(); - write_ordered_buffers(&journal->j_dirty_buffers_lock, - journal, jl, &jl->j_bh_list); - lock_kernel(); - } - BUG_ON (!list_empty(&jl->j_bh_list)); - /* - * for the description block and all the log blocks, submit any buffers - * that haven't already reached the disk - */ - atomic_inc(&journal->j_async_throttle); - for (i = 0 ; i < (jl->j_len + 1) ; i++) { - bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start+i) % - SB_ONDISK_JOURNAL_SIZE(s); - tbh = journal_find_get_block(s, bn) ; - if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ - ll_rw_block(WRITE, 1, &tbh) ; - put_bh(tbh) ; - } - atomic_dec(&journal->j_async_throttle); - - /* wait on everything written so far before writing the commit - * if we are in barrier mode, send the commit down now - */ - barrier = reiserfs_barrier_flush(s); - if (barrier) { - int ret; - lock_buffer(jl->j_commit_bh); - ret = submit_barrier_buffer(jl->j_commit_bh); - if (ret == -EOPNOTSUPP) { - set_buffer_uptodate(jl->j_commit_bh); - disable_barrier(s); - barrier = 0; - } - } - for (i = 0 ; i < (jl->j_len + 1) ; i++) { - bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + - (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s) ; - tbh = journal_find_get_block(s, bn) ; - wait_on_buffer(tbh) ; - // since we're using ll_rw_blk above, it might have skipped over - // a locked buffer. Double check here - // - if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ - sync_dirty_buffer(tbh); - if (unlikely (!buffer_uptodate(tbh))) { +static int flush_commit_list(struct super_block *s, + struct reiserfs_journal_list *jl, int flushall) +{ + int i; + int bn; + struct buffer_head *tbh = NULL; + unsigned long trans_id = jl->j_trans_id; + struct reiserfs_journal *journal = SB_JOURNAL(s); + int barrier = 0; + int retval = 0; + + reiserfs_check_lock_depth(s, "flush_commit_list"); + + if (atomic_read(&jl->j_older_commits_done)) { + return 0; + } + + get_fs_excl(); + + /* before we can put our commit blocks on disk, we have to make sure everyone older than + ** us is on disk too + */ + BUG_ON(jl->j_len <= 0); + BUG_ON(trans_id == journal->j_trans_id); + + get_journal_list(jl); + if (flushall) { + if (flush_older_commits(s, jl) == 1) { + /* list disappeared during flush_older_commits. return */ + goto put_jl; + } + } + + /* make sure nobody is trying to flush this one at the same time */ + down(&jl->j_commit_lock); + if (!journal_list_still_alive(s, trans_id)) { + up(&jl->j_commit_lock); + goto put_jl; + } + BUG_ON(jl->j_trans_id == 0); + + /* this commit is done, exit */ + if (atomic_read(&(jl->j_commit_left)) <= 0) { + if (flushall) { + atomic_set(&(jl->j_older_commits_done), 1); + } + up(&jl->j_commit_lock); + goto put_jl; + } + + if (!list_empty(&jl->j_bh_list)) { + unlock_kernel(); + write_ordered_buffers(&journal->j_dirty_buffers_lock, + journal, jl, &jl->j_bh_list); + lock_kernel(); + } + BUG_ON(!list_empty(&jl->j_bh_list)); + /* + * for the description block and all the log blocks, submit any buffers + * that haven't already reached the disk + */ + atomic_inc(&journal->j_async_throttle); + for (i = 0; i < (jl->j_len + 1); i++) { + bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % + SB_ONDISK_JOURNAL_SIZE(s); + tbh = journal_find_get_block(s, bn); + if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ + ll_rw_block(WRITE, 1, &tbh); + put_bh(tbh); + } + atomic_dec(&journal->j_async_throttle); + + /* wait on everything written so far before writing the commit + * if we are in barrier mode, send the commit down now + */ + barrier = reiserfs_barrier_flush(s); + if (barrier) { + int ret; + lock_buffer(jl->j_commit_bh); + ret = submit_barrier_buffer(jl->j_commit_bh); + if (ret == -EOPNOTSUPP) { + set_buffer_uptodate(jl->j_commit_bh); + disable_barrier(s); + barrier = 0; + } + } + for (i = 0; i < (jl->j_len + 1); i++) { + bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + + (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); + tbh = journal_find_get_block(s, bn); + wait_on_buffer(tbh); + // since we're using ll_rw_blk above, it might have skipped over + // a locked buffer. Double check here + // + if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ + sync_dirty_buffer(tbh); + if (unlikely(!buffer_uptodate(tbh))) { #ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-601, buffer write failed") ; + reiserfs_warning(s, "journal-601, buffer write failed"); #endif - retval = -EIO; - } - put_bh(tbh) ; /* once for journal_find_get_block */ - put_bh(tbh) ; /* once due to original getblk in do_journal_end */ - atomic_dec(&(jl->j_commit_left)) ; - } - - BUG_ON (atomic_read(&(jl->j_commit_left)) != 1); - - if (!barrier) { - if (buffer_dirty(jl->j_commit_bh)) - BUG(); - mark_buffer_dirty(jl->j_commit_bh) ; - sync_dirty_buffer(jl->j_commit_bh) ; - } else - wait_on_buffer(jl->j_commit_bh); - - check_barrier_completion(s, jl->j_commit_bh); - - /* If there was a write error in the journal - we can't commit this - * transaction - it will be invalid and, if successful, will just end - * up propogating the write error out to the filesystem. */ - if (unlikely (!buffer_uptodate(jl->j_commit_bh))) { + retval = -EIO; + } + put_bh(tbh); /* once for journal_find_get_block */ + put_bh(tbh); /* once due to original getblk in do_journal_end */ + atomic_dec(&(jl->j_commit_left)); + } + + BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); + + if (!barrier) { + if (buffer_dirty(jl->j_commit_bh)) + BUG(); + mark_buffer_dirty(jl->j_commit_bh); + sync_dirty_buffer(jl->j_commit_bh); + } else + wait_on_buffer(jl->j_commit_bh); + + check_barrier_completion(s, jl->j_commit_bh); + + /* If there was a write error in the journal - we can't commit this + * transaction - it will be invalid and, if successful, will just end + * up propogating the write error out to the filesystem. */ + if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { #ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-615: buffer write failed") ; + reiserfs_warning(s, "journal-615: buffer write failed"); #endif - retval = -EIO; - } - bforget(jl->j_commit_bh) ; - if (journal->j_last_commit_id != 0 && - (jl->j_trans_id - journal->j_last_commit_id) != 1) { - reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", - journal->j_last_commit_id, - jl->j_trans_id); - } - journal->j_last_commit_id = jl->j_trans_id; - - /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ - cleanup_freed_for_journal_list(s, jl) ; - - retval = retval ? retval : journal->j_errno; - - /* mark the metadata dirty */ - if (!retval) - dirty_one_transaction(s, jl); - atomic_dec(&(jl->j_commit_left)) ; - - if (flushall) { - atomic_set(&(jl->j_older_commits_done), 1) ; - } - up(&jl->j_commit_lock); -put_jl: - put_journal_list(s, jl); - - if (retval) - reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); - put_fs_excl(); - return retval; + retval = -EIO; + } + bforget(jl->j_commit_bh); + if (journal->j_last_commit_id != 0 && + (jl->j_trans_id - journal->j_last_commit_id) != 1) { + reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", + journal->j_last_commit_id, jl->j_trans_id); + } + journal->j_last_commit_id = jl->j_trans_id; + + /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ + cleanup_freed_for_journal_list(s, jl); + + retval = retval ? retval : journal->j_errno; + + /* mark the metadata dirty */ + if (!retval) + dirty_one_transaction(s, jl); + atomic_dec(&(jl->j_commit_left)); + + if (flushall) { + atomic_set(&(jl->j_older_commits_done), 1); + } + up(&jl->j_commit_lock); + put_jl: + put_journal_list(s, jl); + + if (retval) + reiserfs_abort(s, retval, "Journal write error in %s", + __FUNCTION__); + put_fs_excl(); + return retval; } /* ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or ** returns NULL if it can't find anything */ -static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) { - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr ; +static struct reiserfs_journal_list *find_newer_jl_for_cn(struct + reiserfs_journal_cnode + *cn) +{ + struct super_block *sb = cn->sb; + b_blocknr_t blocknr = cn->blocknr; - cn = cn->hprev ; - while(cn) { - if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { - return cn->jlist ; - } - cn = cn->hprev ; - } - return NULL ; + cn = cn->hprev; + while (cn) { + if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { + return cn->jlist; + } + cn = cn->hprev; + } + return NULL; } -static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, -struct reiserfs_journal_list *, unsigned long, int); +static void remove_journal_hash(struct super_block *, + struct reiserfs_journal_cnode **, + struct reiserfs_journal_list *, unsigned long, + int); /* ** once all the real blocks have been flushed, it is safe to remove them from the ** journal list for this transaction. Aside from freeing the cnode, this also allows the ** block to be reallocated for data blocks if it had been deleted. */ -static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, int debug) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_journal_cnode *cn, *last ; - cn = jl->j_realblock ; - - /* which is better, to lock once around the whole loop, or - ** to lock for each call to remove_journal_hash? - */ - while(cn) { - if (cn->blocknr != 0) { - if (debug) { - reiserfs_warning (p_s_sb, "block %u, bh is %d, state %ld", cn->blocknr, - cn->bh ? 1: 0, cn->state) ; - } - cn->state = 0 ; - remove_journal_hash(p_s_sb, journal->j_list_hash_table, jl, cn->blocknr, 1) ; - } - last = cn ; - cn = cn->next ; - free_cnode(p_s_sb, last) ; - } - jl->j_realblock = NULL ; +static void remove_all_from_journal_list(struct super_block *p_s_sb, + struct reiserfs_journal_list *jl, + int debug) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_journal_cnode *cn, *last; + cn = jl->j_realblock; + + /* which is better, to lock once around the whole loop, or + ** to lock for each call to remove_journal_hash? + */ + while (cn) { + if (cn->blocknr != 0) { + if (debug) { + reiserfs_warning(p_s_sb, + "block %u, bh is %d, state %ld", + cn->blocknr, cn->bh ? 1 : 0, + cn->state); + } + cn->state = 0; + remove_journal_hash(p_s_sb, journal->j_list_hash_table, + jl, cn->blocknr, 1); + } + last = cn; + cn = cn->next; + free_cnode(p_s_sb, last); + } + jl->j_realblock = NULL; } /* @@ -1122,98 +1195,107 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reis ** called by flush_journal_list, before it calls remove_all_from_journal_list ** */ -static int _update_journal_header_block(struct super_block *p_s_sb, unsigned long offset, unsigned long trans_id) { - struct reiserfs_journal_header *jh ; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); +static int _update_journal_header_block(struct super_block *p_s_sb, + unsigned long offset, + unsigned long trans_id) +{ + struct reiserfs_journal_header *jh; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); - if (reiserfs_is_journal_aborted (journal)) - return -EIO; + if (reiserfs_is_journal_aborted(journal)) + return -EIO; - if (trans_id >= journal->j_last_flush_trans_id) { - if (buffer_locked((journal->j_header_bh))) { - wait_on_buffer((journal->j_header_bh)) ; - if (unlikely (!buffer_uptodate(journal->j_header_bh))) { + if (trans_id >= journal->j_last_flush_trans_id) { + if (buffer_locked((journal->j_header_bh))) { + wait_on_buffer((journal->j_header_bh)); + if (unlikely(!buffer_uptodate(journal->j_header_bh))) { #ifdef CONFIG_REISERFS_CHECK - reiserfs_warning (p_s_sb, "journal-699: buffer write failed") ; + reiserfs_warning(p_s_sb, + "journal-699: buffer write failed"); #endif - return -EIO; - } - } - journal->j_last_flush_trans_id = trans_id ; - journal->j_first_unflushed_offset = offset ; - jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data) ; - jh->j_last_flush_trans_id = cpu_to_le32(trans_id) ; - jh->j_first_unflushed_offset = cpu_to_le32(offset) ; - jh->j_mount_id = cpu_to_le32(journal->j_mount_id) ; - - if (reiserfs_barrier_flush(p_s_sb)) { - int ret; - lock_buffer(journal->j_header_bh); - ret = submit_barrier_buffer(journal->j_header_bh); - if (ret == -EOPNOTSUPP) { - set_buffer_uptodate(journal->j_header_bh); - disable_barrier(p_s_sb); - goto sync; - } - wait_on_buffer(journal->j_header_bh); - check_barrier_completion(p_s_sb, journal->j_header_bh); - } else { -sync: - set_buffer_dirty(journal->j_header_bh) ; - sync_dirty_buffer(journal->j_header_bh) ; - } - if (!buffer_uptodate(journal->j_header_bh)) { - reiserfs_warning (p_s_sb, "journal-837: IO error during journal replay"); - return -EIO ; - } - } - return 0 ; -} - -static int update_journal_header_block(struct super_block *p_s_sb, - unsigned long offset, - unsigned long trans_id) { - return _update_journal_header_block(p_s_sb, offset, trans_id); + return -EIO; + } + } + journal->j_last_flush_trans_id = trans_id; + journal->j_first_unflushed_offset = offset; + jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> + b_data); + jh->j_last_flush_trans_id = cpu_to_le32(trans_id); + jh->j_first_unflushed_offset = cpu_to_le32(offset); + jh->j_mount_id = cpu_to_le32(journal->j_mount_id); + + if (reiserfs_barrier_flush(p_s_sb)) { + int ret; + lock_buffer(journal->j_header_bh); + ret = submit_barrier_buffer(journal->j_header_bh); + if (ret == -EOPNOTSUPP) { + set_buffer_uptodate(journal->j_header_bh); + disable_barrier(p_s_sb); + goto sync; + } + wait_on_buffer(journal->j_header_bh); + check_barrier_completion(p_s_sb, journal->j_header_bh); + } else { + sync: + set_buffer_dirty(journal->j_header_bh); + sync_dirty_buffer(journal->j_header_bh); + } + if (!buffer_uptodate(journal->j_header_bh)) { + reiserfs_warning(p_s_sb, + "journal-837: IO error during journal replay"); + return -EIO; + } + } + return 0; +} + +static int update_journal_header_block(struct super_block *p_s_sb, + unsigned long offset, + unsigned long trans_id) +{ + return _update_journal_header_block(p_s_sb, offset, trans_id); } + /* ** flush any and all journal lists older than you are ** can only be called from flush_journal_list */ static int flush_older_journal_lists(struct super_block *p_s_sb, - struct reiserfs_journal_list *jl) -{ - struct list_head *entry; - struct reiserfs_journal_list *other_jl ; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - unsigned long trans_id = jl->j_trans_id; - - /* we know we are the only ones flushing things, no extra race - * protection is required. - */ -restart: - entry = journal->j_journal_list.next; - /* Did we wrap? */ - if (entry == &journal->j_journal_list) - return 0; - other_jl = JOURNAL_LIST_ENTRY(entry); - if (other_jl->j_trans_id < trans_id) { - BUG_ON (other_jl->j_refcount <= 0); - /* do not flush all */ - flush_journal_list(p_s_sb, other_jl, 0) ; - - /* other_jl is now deleted from the list */ - goto restart; - } - return 0 ; + struct reiserfs_journal_list *jl) +{ + struct list_head *entry; + struct reiserfs_journal_list *other_jl; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + unsigned long trans_id = jl->j_trans_id; + + /* we know we are the only ones flushing things, no extra race + * protection is required. + */ + restart: + entry = journal->j_journal_list.next; + /* Did we wrap? */ + if (entry == &journal->j_journal_list) + return 0; + other_jl = JOURNAL_LIST_ENTRY(entry); + if (other_jl->j_trans_id < trans_id) { + BUG_ON(other_jl->j_refcount <= 0); + /* do not flush all */ + flush_journal_list(p_s_sb, other_jl, 0); + + /* other_jl is now deleted from the list */ + goto restart; + } + return 0; } static void del_from_work_list(struct super_block *s, - struct reiserfs_journal_list *jl) { - struct reiserfs_journal *journal = SB_JOURNAL (s); - if (!list_empty(&jl->j_working_list)) { - list_del_init(&jl->j_working_list); - journal->j_num_work_lists--; - } + struct reiserfs_journal_list *jl) +{ + struct reiserfs_journal *journal = SB_JOURNAL(s); + if (!list_empty(&jl->j_working_list)) { + list_del_init(&jl->j_working_list); + journal->j_num_work_lists--; + } } /* flush a journal list, both commit and real blocks @@ -1225,439 +1307,461 @@ static void del_from_work_list(struct super_block *s, ** and the journal is locked. That means it can only be called from ** do_journal_end, or by journal_release */ -static int flush_journal_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall) { - struct reiserfs_journal_list *pjl ; - struct reiserfs_journal_cnode *cn, *last ; - int count ; - int was_jwait = 0 ; - int was_dirty = 0 ; - struct buffer_head *saved_bh ; - unsigned long j_len_saved = jl->j_len ; - struct reiserfs_journal *journal = SB_JOURNAL (s); - int err = 0; - - BUG_ON (j_len_saved <= 0); - - if (atomic_read(&journal->j_wcount) != 0) { - reiserfs_warning(s, "clm-2048: flush_journal_list called with wcount %d", - atomic_read(&journal->j_wcount)) ; - } - BUG_ON (jl->j_trans_id == 0); - - /* if flushall == 0, the lock is already held */ - if (flushall) { - down(&journal->j_flush_sem); - } else if (!down_trylock(&journal->j_flush_sem)) { - BUG(); - } - - count = 0 ; - if (j_len_saved > journal->j_trans_max) { - reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, trans id %lu\n", j_len_saved, jl->j_trans_id); - return 0 ; - } - - get_fs_excl(); - - /* if all the work is already done, get out of here */ - if (atomic_read(&(jl->j_nonzerolen)) <= 0 && - atomic_read(&(jl->j_commit_left)) <= 0) { - goto flush_older_and_return ; - } - - /* start by putting the commit list on disk. This will also flush - ** the commit lists of any olders transactions - */ - flush_commit_list(s, jl, 1) ; - - if (!(jl->j_state & LIST_DIRTY) && !reiserfs_is_journal_aborted (journal)) - BUG(); - - /* are we done now? */ - if (atomic_read(&(jl->j_nonzerolen)) <= 0 && - atomic_read(&(jl->j_commit_left)) <= 0) { - goto flush_older_and_return ; - } - - /* loop through each cnode, see if we need to write it, - ** or wait on a more recent transaction, or just ignore it - */ - if (atomic_read(&(journal->j_wcount)) != 0) { - reiserfs_panic(s, "journal-844: panic journal list is flushing, wcount is not 0\n") ; - } - cn = jl->j_realblock ; - while(cn) { - was_jwait = 0 ; - was_dirty = 0 ; - saved_bh = NULL ; - /* blocknr of 0 is no longer in the hash, ignore it */ - if (cn->blocknr == 0) { - goto free_cnode ; - } - - /* This transaction failed commit. Don't write out to the disk */ - if (!(jl->j_state & LIST_DIRTY)) - goto free_cnode; - - pjl = find_newer_jl_for_cn(cn) ; - /* the order is important here. We check pjl to make sure we - ** don't clear BH_JDirty_wait if we aren't the one writing this - ** block to disk - */ - if (!pjl && cn->bh) { - saved_bh = cn->bh ; - - /* we do this to make sure nobody releases the buffer while - ** we are working with it - */ - get_bh(saved_bh) ; - - if (buffer_journal_dirty(saved_bh)) { - BUG_ON (!can_dirty (cn)); - was_jwait = 1 ; - was_dirty = 1 ; - } else if (can_dirty(cn)) { - /* everything with !pjl && jwait should be writable */ - BUG(); - } - } - - /* if someone has this block in a newer transaction, just make - ** sure they are commited, and don't try writing it to disk - */ - if (pjl) { - if (atomic_read(&pjl->j_commit_left)) - flush_commit_list(s, pjl, 1) ; - goto free_cnode ; - } - - /* bh == NULL when the block got to disk on its own, OR, - ** the block got freed in a future transaction - */ - if (saved_bh == NULL) { - goto free_cnode ; - } - - /* this should never happen. kupdate_one_transaction has this list - ** locked while it works, so we should never see a buffer here that - ** is not marked JDirty_wait - */ - if ((!was_jwait) && !buffer_locked(saved_bh)) { - reiserfs_warning (s, "journal-813: BAD! buffer %llu %cdirty %cjwait, " - "not in a newer tranasction", - (unsigned long long)saved_bh->b_blocknr, - was_dirty ? ' ' : '!', was_jwait ? ' ' : '!') ; - } - if (was_dirty) { - /* we inc again because saved_bh gets decremented at free_cnode */ - get_bh(saved_bh) ; - set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ; - lock_buffer(saved_bh); - BUG_ON (cn->blocknr != saved_bh->b_blocknr); - if (buffer_dirty(saved_bh)) - submit_logged_buffer(saved_bh) ; - else - unlock_buffer(saved_bh); - count++ ; - } else { - reiserfs_warning (s, "clm-2082: Unable to flush buffer %llu in %s", - (unsigned long long)saved_bh->b_blocknr, __FUNCTION__); - } -free_cnode: - last = cn ; - cn = cn->next ; - if (saved_bh) { - /* we incremented this to keep others from taking the buffer head away */ - put_bh(saved_bh) ; - if (atomic_read(&(saved_bh->b_count)) < 0) { - reiserfs_warning (s, "journal-945: saved_bh->b_count < 0"); - } - } - } - if (count > 0) { - cn = jl->j_realblock ; - while(cn) { - if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { - if (!cn->bh) { - reiserfs_panic(s, "journal-1011: cn->bh is NULL\n") ; - } - wait_on_buffer(cn->bh) ; - if (!cn->bh) { - reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ; - } - if (unlikely (!buffer_uptodate(cn->bh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-949: buffer write failed\n") ; -#endif - err = -EIO; - } - /* note, we must clear the JDirty_wait bit after the up to date - ** check, otherwise we race against our flushpage routine - */ - BUG_ON (!test_clear_buffer_journal_dirty (cn->bh)); - - /* undo the inc from journal_mark_dirty */ - put_bh(cn->bh) ; - brelse(cn->bh) ; - } - cn = cn->next ; - } - } - - if (err) - reiserfs_abort (s, -EIO, "Write error while pushing transaction to disk in %s", __FUNCTION__); -flush_older_and_return: - - - /* before we can update the journal header block, we _must_ flush all - ** real blocks from all older transactions to disk. This is because - ** once the header block is updated, this transaction will not be - ** replayed after a crash - */ - if (flushall) { - flush_older_journal_lists(s, jl); - } - - err = journal->j_errno; - /* before we can remove everything from the hash tables for this - ** transaction, we must make sure it can never be replayed - ** - ** since we are only called from do_journal_end, we know for sure there - ** are no allocations going on while we are flushing journal lists. So, - ** we only need to update the journal header block for the last list - ** being flushed - */ - if (!err && flushall) { - err = update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(s), jl->j_trans_id) ; - if (err) - reiserfs_abort (s, -EIO, "Write error while updating journal header in %s", __FUNCTION__); - } - remove_all_from_journal_list(s, jl, 0) ; - list_del_init(&jl->j_list); - journal->j_num_lists--; - del_from_work_list(s, jl); - - if (journal->j_last_flush_id != 0 && - (jl->j_trans_id - journal->j_last_flush_id) != 1) { - reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", - journal->j_last_flush_id, - jl->j_trans_id); - } - journal->j_last_flush_id = jl->j_trans_id; - - /* not strictly required since we are freeing the list, but it should - * help find code using dead lists later on - */ - jl->j_len = 0 ; - atomic_set(&(jl->j_nonzerolen), 0) ; - jl->j_start = 0 ; - jl->j_realblock = NULL ; - jl->j_commit_bh = NULL ; - jl->j_trans_id = 0 ; - jl->j_state = 0; - put_journal_list(s, jl); - if (flushall) - up(&journal->j_flush_sem); - put_fs_excl(); - return err ; -} - -static int write_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl, - struct buffer_chunk *chunk) +static int flush_journal_list(struct super_block *s, + struct reiserfs_journal_list *jl, int flushall) { - struct reiserfs_journal_cnode *cn; - int ret = 0 ; - - jl->j_state |= LIST_TOUCHED; - del_from_work_list(s, jl); - if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { - return 0; - } - - cn = jl->j_realblock ; - while(cn) { - /* if the blocknr == 0, this has been cleared from the hash, - ** skip it - */ - if (cn->blocknr == 0) { - goto next ; - } - if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { - struct buffer_head *tmp_bh; - /* we can race against journal_mark_freed when we try - * to lock_buffer(cn->bh), so we have to inc the buffer - * count, and recheck things after locking - */ - tmp_bh = cn->bh; - get_bh(tmp_bh); - lock_buffer(tmp_bh); - if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { - if (!buffer_journal_dirty(tmp_bh) || - buffer_journal_prepared(tmp_bh)) - BUG(); - add_to_chunk(chunk, tmp_bh, NULL, write_chunk); - ret++; - } else { - /* note, cn->bh might be null now */ - unlock_buffer(tmp_bh); - } - put_bh(tmp_bh); - } -next: - cn = cn->next ; - cond_resched(); - } - return ret ; -} + struct reiserfs_journal_list *pjl; + struct reiserfs_journal_cnode *cn, *last; + int count; + int was_jwait = 0; + int was_dirty = 0; + struct buffer_head *saved_bh; + unsigned long j_len_saved = jl->j_len; + struct reiserfs_journal *journal = SB_JOURNAL(s); + int err = 0; + + BUG_ON(j_len_saved <= 0); + + if (atomic_read(&journal->j_wcount) != 0) { + reiserfs_warning(s, + "clm-2048: flush_journal_list called with wcount %d", + atomic_read(&journal->j_wcount)); + } + BUG_ON(jl->j_trans_id == 0); -/* used by flush_commit_list */ -static int dirty_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal_cnode *cn; - struct reiserfs_journal_list *pjl; - int ret = 0 ; - - jl->j_state |= LIST_DIRTY; - cn = jl->j_realblock ; - while(cn) { - /* look for a more recent transaction that logged this - ** buffer. Only the most recent transaction with a buffer in - ** it is allowed to send that buffer to disk - */ - pjl = find_newer_jl_for_cn(cn) ; - if (!pjl && cn->blocknr && cn->bh && buffer_journal_dirty(cn->bh)) - { - BUG_ON (!can_dirty(cn)); - /* if the buffer is prepared, it will either be logged - * or restored. If restored, we need to make sure - * it actually gets marked dirty - */ - clear_buffer_journal_new (cn->bh); - if (buffer_journal_prepared (cn->bh)) { - set_buffer_journal_restore_dirty (cn->bh); - } else { - set_buffer_journal_test (cn->bh); - mark_buffer_dirty(cn->bh); - } - } - cn = cn->next ; - } - return ret ; -} + /* if flushall == 0, the lock is already held */ + if (flushall) { + down(&journal->j_flush_sem); + } else if (!down_trylock(&journal->j_flush_sem)) { + BUG(); + } -static int kupdate_transactions(struct super_block *s, - struct reiserfs_journal_list *jl, - struct reiserfs_journal_list **next_jl, - unsigned long *next_trans_id, - int num_blocks, - int num_trans) { - int ret = 0; - int written = 0 ; - int transactions_flushed = 0; - unsigned long orig_trans_id = jl->j_trans_id; - struct buffer_chunk chunk; - struct list_head *entry; - struct reiserfs_journal *journal = SB_JOURNAL (s); - chunk.nr = 0; - - down(&journal->j_flush_sem); - if (!journal_list_still_alive(s, orig_trans_id)) { - goto done; - } - - /* we've got j_flush_sem held, nobody is going to delete any - * of these lists out from underneath us - */ - while((num_trans && transactions_flushed < num_trans) || - (!num_trans && written < num_blocks)) { - - if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || - atomic_read(&jl->j_commit_left) || !(jl->j_state & LIST_DIRTY)) - { - del_from_work_list(s, jl); - break; - } - ret = write_one_transaction(s, jl, &chunk); - - if (ret < 0) - goto done; - transactions_flushed++; - written += ret; - entry = jl->j_list.next; - - /* did we wrap? */ - if (entry == &journal->j_journal_list) { - break; - } - jl = JOURNAL_LIST_ENTRY(entry); - - /* don't bother with older transactions */ - if (jl->j_trans_id <= orig_trans_id) - break; - } - if (chunk.nr) { - write_chunk(&chunk); - } - -done: - up(&journal->j_flush_sem); - return ret; -} + count = 0; + if (j_len_saved > journal->j_trans_max) { + reiserfs_panic(s, + "journal-715: flush_journal_list, length is %lu, trans id %lu\n", + j_len_saved, jl->j_trans_id); + return 0; + } -/* for o_sync and fsync heavy applications, they tend to use -** all the journa list slots with tiny transactions. These -** trigger lots and lots of calls to update the header block, which -** adds seeks and slows things down. -** -** This function tries to clear out a large chunk of the journal lists -** at once, which makes everything faster since only the newest journal + get_fs_excl(); + + /* if all the work is already done, get out of here */ + if (atomic_read(&(jl->j_nonzerolen)) <= 0 && + atomic_read(&(jl->j_commit_left)) <= 0) { + goto flush_older_and_return; + } + + /* start by putting the commit list on disk. This will also flush + ** the commit lists of any olders transactions + */ + flush_commit_list(s, jl, 1); + + if (!(jl->j_state & LIST_DIRTY) + && !reiserfs_is_journal_aborted(journal)) + BUG(); + + /* are we done now? */ + if (atomic_read(&(jl->j_nonzerolen)) <= 0 && + atomic_read(&(jl->j_commit_left)) <= 0) { + goto flush_older_and_return; + } + + /* loop through each cnode, see if we need to write it, + ** or wait on a more recent transaction, or just ignore it + */ + if (atomic_read(&(journal->j_wcount)) != 0) { + reiserfs_panic(s, + "journal-844: panic journal list is flushing, wcount is not 0\n"); + } + cn = jl->j_realblock; + while (cn) { + was_jwait = 0; + was_dirty = 0; + saved_bh = NULL; + /* blocknr of 0 is no longer in the hash, ignore it */ + if (cn->blocknr == 0) { + goto free_cnode; + } + + /* This transaction failed commit. Don't write out to the disk */ + if (!(jl->j_state & LIST_DIRTY)) + goto free_cnode; + + pjl = find_newer_jl_for_cn(cn); + /* the order is important here. We check pjl to make sure we + ** don't clear BH_JDirty_wait if we aren't the one writing this + ** block to disk + */ + if (!pjl && cn->bh) { + saved_bh = cn->bh; + + /* we do this to make sure nobody releases the buffer while + ** we are working with it + */ + get_bh(saved_bh); + + if (buffer_journal_dirty(saved_bh)) { + BUG_ON(!can_dirty(cn)); + was_jwait = 1; + was_dirty = 1; + } else if (can_dirty(cn)) { + /* everything with !pjl && jwait should be writable */ + BUG(); + } + } + + /* if someone has this block in a newer transaction, just make + ** sure they are commited, and don't try writing it to disk + */ + if (pjl) { + if (atomic_read(&pjl->j_commit_left)) + flush_commit_list(s, pjl, 1); + goto free_cnode; + } + + /* bh == NULL when the block got to disk on its own, OR, + ** the block got freed in a future transaction + */ + if (saved_bh == NULL) { + goto free_cnode; + } + + /* this should never happen. kupdate_one_transaction has this list + ** locked while it works, so we should never see a buffer here that + ** is not marked JDirty_wait + */ + if ((!was_jwait) && !buffer_locked(saved_bh)) { + reiserfs_warning(s, + "journal-813: BAD! buffer %llu %cdirty %cjwait, " + "not in a newer tranasction", + (unsigned long long)saved_bh-> + b_blocknr, was_dirty ? ' ' : '!', + was_jwait ? ' ' : '!'); + } + if (was_dirty) { + /* we inc again because saved_bh gets decremented at free_cnode */ + get_bh(saved_bh); + set_bit(BLOCK_NEEDS_FLUSH, &cn->state); + lock_buffer(saved_bh); + BUG_ON(cn->blocknr != saved_bh->b_blocknr); + if (buffer_dirty(saved_bh)) + submit_logged_buffer(saved_bh); + else + unlock_buffer(saved_bh); + count++; + } else { + reiserfs_warning(s, + "clm-2082: Unable to flush buffer %llu in %s", + (unsigned long long)saved_bh-> + b_blocknr, __FUNCTION__); + } + free_cnode: + last = cn; + cn = cn->next; + if (saved_bh) { + /* we incremented this to keep others from taking the buffer head away */ + put_bh(saved_bh); + if (atomic_read(&(saved_bh->b_count)) < 0) { + reiserfs_warning(s, + "journal-945: saved_bh->b_count < 0"); + } + } + } + if (count > 0) { + cn = jl->j_realblock; + while (cn) { + if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { + if (!cn->bh) { + reiserfs_panic(s, + "journal-1011: cn->bh is NULL\n"); + } + wait_on_buffer(cn->bh); + if (!cn->bh) { + reiserfs_panic(s, + "journal-1012: cn->bh is NULL\n"); + } + if (unlikely(!buffer_uptodate(cn->bh))) { +#ifdef CONFIG_REISERFS_CHECK + reiserfs_warning(s, + "journal-949: buffer write failed\n"); +#endif + err = -EIO; + } + /* note, we must clear the JDirty_wait bit after the up to date + ** check, otherwise we race against our flushpage routine + */ + BUG_ON(!test_clear_buffer_journal_dirty + (cn->bh)); + + /* undo the inc from journal_mark_dirty */ + put_bh(cn->bh); + brelse(cn->bh); + } + cn = cn->next; + } + } + + if (err) + reiserfs_abort(s, -EIO, + "Write error while pushing transaction to disk in %s", + __FUNCTION__); + flush_older_and_return: + + /* before we can update the journal header block, we _must_ flush all + ** real blocks from all older transactions to disk. This is because + ** once the header block is updated, this transaction will not be + ** replayed after a crash + */ + if (flushall) { + flush_older_journal_lists(s, jl); + } + + err = journal->j_errno; + /* before we can remove everything from the hash tables for this + ** transaction, we must make sure it can never be replayed + ** + ** since we are only called from do_journal_end, we know for sure there + ** are no allocations going on while we are flushing journal lists. So, + ** we only need to update the journal header block for the last list + ** being flushed + */ + if (!err && flushall) { + err = + update_journal_header_block(s, + (jl->j_start + jl->j_len + + 2) % SB_ONDISK_JOURNAL_SIZE(s), + jl->j_trans_id); + if (err) + reiserfs_abort(s, -EIO, + "Write error while updating journal header in %s", + __FUNCTION__); + } + remove_all_from_journal_list(s, jl, 0); + list_del_init(&jl->j_list); + journal->j_num_lists--; + del_from_work_list(s, jl); + + if (journal->j_last_flush_id != 0 && + (jl->j_trans_id - journal->j_last_flush_id) != 1) { + reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", + journal->j_last_flush_id, jl->j_trans_id); + } + journal->j_last_flush_id = jl->j_trans_id; + + /* not strictly required since we are freeing the list, but it should + * help find code using dead lists later on + */ + jl->j_len = 0; + atomic_set(&(jl->j_nonzerolen), 0); + jl->j_start = 0; + jl->j_realblock = NULL; + jl->j_commit_bh = NULL; + jl->j_trans_id = 0; + jl->j_state = 0; + put_journal_list(s, jl); + if (flushall) + up(&journal->j_flush_sem); + put_fs_excl(); + return err; +} + +static int write_one_transaction(struct super_block *s, + struct reiserfs_journal_list *jl, + struct buffer_chunk *chunk) +{ + struct reiserfs_journal_cnode *cn; + int ret = 0; + + jl->j_state |= LIST_TOUCHED; + del_from_work_list(s, jl); + if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { + return 0; + } + + cn = jl->j_realblock; + while (cn) { + /* if the blocknr == 0, this has been cleared from the hash, + ** skip it + */ + if (cn->blocknr == 0) { + goto next; + } + if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { + struct buffer_head *tmp_bh; + /* we can race against journal_mark_freed when we try + * to lock_buffer(cn->bh), so we have to inc the buffer + * count, and recheck things after locking + */ + tmp_bh = cn->bh; + get_bh(tmp_bh); + lock_buffer(tmp_bh); + if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { + if (!buffer_journal_dirty(tmp_bh) || + buffer_journal_prepared(tmp_bh)) + BUG(); + add_to_chunk(chunk, tmp_bh, NULL, write_chunk); + ret++; + } else { + /* note, cn->bh might be null now */ + unlock_buffer(tmp_bh); + } + put_bh(tmp_bh); + } + next: + cn = cn->next; + cond_resched(); + } + return ret; +} + +/* used by flush_commit_list */ +static int dirty_one_transaction(struct super_block *s, + struct reiserfs_journal_list *jl) +{ + struct reiserfs_journal_cnode *cn; + struct reiserfs_journal_list *pjl; + int ret = 0; + + jl->j_state |= LIST_DIRTY; + cn = jl->j_realblock; + while (cn) { + /* look for a more recent transaction that logged this + ** buffer. Only the most recent transaction with a buffer in + ** it is allowed to send that buffer to disk + */ + pjl = find_newer_jl_for_cn(cn); + if (!pjl && cn->blocknr && cn->bh + && buffer_journal_dirty(cn->bh)) { + BUG_ON(!can_dirty(cn)); + /* if the buffer is prepared, it will either be logged + * or restored. If restored, we need to make sure + * it actually gets marked dirty + */ + clear_buffer_journal_new(cn->bh); + if (buffer_journal_prepared(cn->bh)) { + set_buffer_journal_restore_dirty(cn->bh); + } else { + set_buffer_journal_test(cn->bh); + mark_buffer_dirty(cn->bh); + } + } + cn = cn->next; + } + return ret; +} + +static int kupdate_transactions(struct super_block *s, + struct reiserfs_journal_list *jl, + struct reiserfs_journal_list **next_jl, + unsigned long *next_trans_id, + int num_blocks, int num_trans) +{ + int ret = 0; + int written = 0; + int transactions_flushed = 0; + unsigned long orig_trans_id = jl->j_trans_id; + struct buffer_chunk chunk; + struct list_head *entry; + struct reiserfs_journal *journal = SB_JOURNAL(s); + chunk.nr = 0; + + down(&journal->j_flush_sem); + if (!journal_list_still_alive(s, orig_trans_id)) { + goto done; + } + + /* we've got j_flush_sem held, nobody is going to delete any + * of these lists out from underneath us + */ + while ((num_trans && transactions_flushed < num_trans) || + (!num_trans && written < num_blocks)) { + + if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || + atomic_read(&jl->j_commit_left) + || !(jl->j_state & LIST_DIRTY)) { + del_from_work_list(s, jl); + break; + } + ret = write_one_transaction(s, jl, &chunk); + + if (ret < 0) + goto done; + transactions_flushed++; + written += ret; + entry = jl->j_list.next; + + /* did we wrap? */ + if (entry == &journal->j_journal_list) { + break; + } + jl = JOURNAL_LIST_ENTRY(entry); + + /* don't bother with older transactions */ + if (jl->j_trans_id <= orig_trans_id) + break; + } + if (chunk.nr) { + write_chunk(&chunk); + } + + done: + up(&journal->j_flush_sem); + return ret; +} + +/* for o_sync and fsync heavy applications, they tend to use +** all the journa list slots with tiny transactions. These +** trigger lots and lots of calls to update the header block, which +** adds seeks and slows things down. +** +** This function tries to clear out a large chunk of the journal lists +** at once, which makes everything faster since only the newest journal ** list updates the header block */ static int flush_used_journal_lists(struct super_block *s, - struct reiserfs_journal_list *jl) { - unsigned long len = 0; - unsigned long cur_len; - int ret; - int i; - int limit = 256; - struct reiserfs_journal_list *tjl; - struct reiserfs_journal_list *flush_jl; - unsigned long trans_id; - struct reiserfs_journal *journal = SB_JOURNAL (s); - - flush_jl = tjl = jl; - - /* in data logging mode, try harder to flush a lot of blocks */ - if (reiserfs_data_log(s)) - limit = 1024; - /* flush for 256 transactions or limit blocks, whichever comes first */ - for(i = 0 ; i < 256 && len < limit ; i++) { - if (atomic_read(&tjl->j_commit_left) || - tjl->j_trans_id < jl->j_trans_id) { - break; - } - cur_len = atomic_read(&tjl->j_nonzerolen); - if (cur_len > 0) { - tjl->j_state &= ~LIST_TOUCHED; - } - len += cur_len; - flush_jl = tjl; - if (tjl->j_list.next == &journal->j_journal_list) - break; - tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); - } - /* try to find a group of blocks we can flush across all the - ** transactions, but only bother if we've actually spanned - ** across multiple lists - */ - if (flush_jl != jl) { - ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); - } - flush_journal_list(s, flush_jl, 1); - return 0; + struct reiserfs_journal_list *jl) +{ + unsigned long len = 0; + unsigned long cur_len; + int ret; + int i; + int limit = 256; + struct reiserfs_journal_list *tjl; + struct reiserfs_journal_list *flush_jl; + unsigned long trans_id; + struct reiserfs_journal *journal = SB_JOURNAL(s); + + flush_jl = tjl = jl; + + /* in data logging mode, try harder to flush a lot of blocks */ + if (reiserfs_data_log(s)) + limit = 1024; + /* flush for 256 transactions or limit blocks, whichever comes first */ + for (i = 0; i < 256 && len < limit; i++) { + if (atomic_read(&tjl->j_commit_left) || + tjl->j_trans_id < jl->j_trans_id) { + break; + } + cur_len = atomic_read(&tjl->j_nonzerolen); + if (cur_len > 0) { + tjl->j_state &= ~LIST_TOUCHED; + } + len += cur_len; + flush_jl = tjl; + if (tjl->j_list.next == &journal->j_journal_list) + break; + tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); + } + /* try to find a group of blocks we can flush across all the + ** transactions, but only bother if we've actually spanned + ** across multiple lists + */ + if (flush_jl != jl) { + ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); + } + flush_journal_list(s, flush_jl, 1); + return 0; } /* @@ -1665,207 +1769,248 @@ static int flush_used_journal_lists(struct super_block *s, ** only touchs the hnext and hprev pointers. */ void remove_journal_hash(struct super_block *sb, - struct reiserfs_journal_cnode **table, - struct reiserfs_journal_list *jl, - unsigned long block, int remove_freed) -{ - struct reiserfs_journal_cnode *cur ; - struct reiserfs_journal_cnode **head ; - - head= &(journal_hash(table, sb, block)) ; - if (!head) { - return ; - } - cur = *head ; - while(cur) { - if (cur->blocknr == block && cur->sb == sb && (jl == NULL || jl == cur->jlist) && - (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { - if (cur->hnext) { - cur->hnext->hprev = cur->hprev ; - } - if (cur->hprev) { - cur->hprev->hnext = cur->hnext ; - } else { - *head = cur->hnext ; - } - cur->blocknr = 0 ; - cur->sb = NULL ; - cur->state = 0 ; - if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ - atomic_dec(&(cur->jlist->j_nonzerolen)) ; - cur->bh = NULL ; - cur->jlist = NULL ; - } - cur = cur->hnext ; - } -} - -static void free_journal_ram(struct super_block *p_s_sb) { - struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); - reiserfs_kfree(journal->j_current_jl, - sizeof(struct reiserfs_journal_list), p_s_sb); - journal->j_num_lists--; - - vfree(journal->j_cnode_free_orig) ; - free_list_bitmaps(p_s_sb, journal->j_list_bitmap) ; - free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */ - if (journal->j_header_bh) { - brelse(journal->j_header_bh) ; - } - /* j_header_bh is on the journal dev, make sure not to release the journal - * dev until we brelse j_header_bh - */ - release_journal_dev(p_s_sb, journal); - vfree(journal) ; + struct reiserfs_journal_cnode **table, + struct reiserfs_journal_list *jl, + unsigned long block, int remove_freed) +{ + struct reiserfs_journal_cnode *cur; + struct reiserfs_journal_cnode **head; + + head = &(journal_hash(table, sb, block)); + if (!head) { + return; + } + cur = *head; + while (cur) { + if (cur->blocknr == block && cur->sb == sb + && (jl == NULL || jl == cur->jlist) + && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { + if (cur->hnext) { + cur->hnext->hprev = cur->hprev; + } + if (cur->hprev) { + cur->hprev->hnext = cur->hnext; + } else { + *head = cur->hnext; + } + cur->blocknr = 0; + cur->sb = NULL; + cur->state = 0; + if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ + atomic_dec(&(cur->jlist->j_nonzerolen)); + cur->bh = NULL; + cur->jlist = NULL; + } + cur = cur->hnext; + } +} + +static void free_journal_ram(struct super_block *p_s_sb) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + reiserfs_kfree(journal->j_current_jl, + sizeof(struct reiserfs_journal_list), p_s_sb); + journal->j_num_lists--; + + vfree(journal->j_cnode_free_orig); + free_list_bitmaps(p_s_sb, journal->j_list_bitmap); + free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */ + if (journal->j_header_bh) { + brelse(journal->j_header_bh); + } + /* j_header_bh is on the journal dev, make sure not to release the journal + * dev until we brelse j_header_bh + */ + release_journal_dev(p_s_sb, journal); + vfree(journal); } /* ** call on unmount. Only set error to 1 if you haven't made your way out ** of read_super() yet. Any other caller must keep error at 0. */ -static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) { - struct reiserfs_transaction_handle myth ; - int flushed = 0; - struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); - - /* we only want to flush out transactions if we were called with error == 0 - */ - if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { - /* end the current trans */ - BUG_ON (!th->t_trans_id); - do_journal_end(th, p_s_sb,10, FLUSH_ALL) ; - - /* make sure something gets logged to force our way into the flush code */ - if (!journal_join(&myth, p_s_sb, 1)) { - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; - journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; - do_journal_end(&myth, p_s_sb,1, FLUSH_ALL) ; - flushed = 1; - } - } - - /* this also catches errors during the do_journal_end above */ - if (!error && reiserfs_is_journal_aborted(journal)) { - memset(&myth, 0, sizeof(myth)); - if (!journal_join_abort(&myth, p_s_sb, 1)) { - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; - journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; - do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL) ; - } - } - - reiserfs_mounted_fs_count-- ; - /* wait for all commits to finish */ - cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); - flush_workqueue(commit_wq); - if (!reiserfs_mounted_fs_count) { - destroy_workqueue(commit_wq); - commit_wq = NULL; - } - - free_journal_ram(p_s_sb) ; - - return 0 ; +static int do_journal_release(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, int error) +{ + struct reiserfs_transaction_handle myth; + int flushed = 0; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + + /* we only want to flush out transactions if we were called with error == 0 + */ + if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { + /* end the current trans */ + BUG_ON(!th->t_trans_id); + do_journal_end(th, p_s_sb, 10, FLUSH_ALL); + + /* make sure something gets logged to force our way into the flush code */ + if (!journal_join(&myth, p_s_sb, 1)) { + reiserfs_prepare_for_journal(p_s_sb, + SB_BUFFER_WITH_SB(p_s_sb), + 1); + journal_mark_dirty(&myth, p_s_sb, + SB_BUFFER_WITH_SB(p_s_sb)); + do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); + flushed = 1; + } + } + + /* this also catches errors during the do_journal_end above */ + if (!error && reiserfs_is_journal_aborted(journal)) { + memset(&myth, 0, sizeof(myth)); + if (!journal_join_abort(&myth, p_s_sb, 1)) { + reiserfs_prepare_for_journal(p_s_sb, + SB_BUFFER_WITH_SB(p_s_sb), + 1); + journal_mark_dirty(&myth, p_s_sb, + SB_BUFFER_WITH_SB(p_s_sb)); + do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); + } + } + + reiserfs_mounted_fs_count--; + /* wait for all commits to finish */ + cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); + flush_workqueue(commit_wq); + if (!reiserfs_mounted_fs_count) { + destroy_workqueue(commit_wq); + commit_wq = NULL; + } + + free_journal_ram(p_s_sb); + + return 0; } /* ** call on unmount. flush all journal trans, release all alloc'd ram */ -int journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) { - return do_journal_release(th, p_s_sb, 0) ; +int journal_release(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb) +{ + return do_journal_release(th, p_s_sb, 0); } + /* ** only call from an error condition inside reiserfs_read_super! */ -int journal_release_error(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) { - return do_journal_release(th, p_s_sb, 1) ; +int journal_release_error(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb) +{ + return do_journal_release(th, p_s_sb, 1); } /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ -static int journal_compare_desc_commit(struct super_block *p_s_sb, struct reiserfs_journal_desc *desc, - struct reiserfs_journal_commit *commit) { - if (get_commit_trans_id (commit) != get_desc_trans_id (desc) || - get_commit_trans_len (commit) != get_desc_trans_len (desc) || - get_commit_trans_len (commit) > SB_JOURNAL(p_s_sb)->j_trans_max || - get_commit_trans_len (commit) <= 0 - ) { - return 1 ; - } - return 0 ; +static int journal_compare_desc_commit(struct super_block *p_s_sb, + struct reiserfs_journal_desc *desc, + struct reiserfs_journal_commit *commit) +{ + if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || + get_commit_trans_len(commit) != get_desc_trans_len(desc) || + get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max || + get_commit_trans_len(commit) <= 0) { + return 1; + } + return 0; } + /* returns 0 if it did not find a description block ** returns -1 if it found a corrupt commit block ** returns 1 if both desc and commit were valid */ -static int journal_transaction_is_valid(struct super_block *p_s_sb, struct buffer_head *d_bh, unsigned long *oldest_invalid_trans_id, unsigned long *newest_mount_id) { - struct reiserfs_journal_desc *desc ; - struct reiserfs_journal_commit *commit ; - struct buffer_head *c_bh ; - unsigned long offset ; - - if (!d_bh) - return 0 ; - - desc = (struct reiserfs_journal_desc *)d_bh->b_data ; - if (get_desc_trans_len(desc) > 0 && !memcmp(get_journal_desc_magic (d_bh), JOURNAL_DESC_MAGIC, 8)) { - if (oldest_invalid_trans_id && *oldest_invalid_trans_id && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-986: transaction " - "is valid returning because trans_id %d is greater than " - "oldest_invalid %lu", get_desc_trans_id(desc), - *oldest_invalid_trans_id); - return 0 ; - } - if (newest_mount_id && *newest_mount_id > get_desc_mount_id (desc)) { - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1087: transaction " - "is valid returning because mount_id %d is less than " - "newest_mount_id %lu", get_desc_mount_id (desc), - *newest_mount_id) ; - return -1 ; - } - if ( get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max ) { - reiserfs_warning(p_s_sb, "journal-2018: Bad transaction length %d encountered, ignoring transaction", get_desc_trans_len(desc)); - return -1 ; - } - offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; - - /* ok, we have a journal description block, lets see if the transaction was valid */ - c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + - ((offset + get_desc_trans_len(desc) + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; - if (!c_bh) - return 0 ; - commit = (struct reiserfs_journal_commit *)c_bh->b_data ; - if (journal_compare_desc_commit(p_s_sb, desc, commit)) { - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, - "journal_transaction_is_valid, commit offset %ld had bad " - "time %d or length %d", - c_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), - get_commit_trans_id (commit), - get_commit_trans_len(commit)); - brelse(c_bh) ; - if (oldest_invalid_trans_id) { - *oldest_invalid_trans_id = get_desc_trans_id(desc) ; - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1004: " - "transaction_is_valid setting oldest invalid trans_id " - "to %d", get_desc_trans_id(desc)) ; - } - return -1; - } - brelse(c_bh) ; - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1006: found valid " - "transaction start offset %llu, len %d id %d", - d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), - get_desc_trans_len(desc), get_desc_trans_id(desc)) ; - return 1 ; - } else { - return 0 ; - } -} - -static void brelse_array(struct buffer_head **heads, int num) { - int i ; - for (i = 0 ; i < num ; i++) { - brelse(heads[i]) ; - } +static int journal_transaction_is_valid(struct super_block *p_s_sb, + struct buffer_head *d_bh, + unsigned long *oldest_invalid_trans_id, + unsigned long *newest_mount_id) +{ + struct reiserfs_journal_desc *desc; + struct reiserfs_journal_commit *commit; + struct buffer_head *c_bh; + unsigned long offset; + + if (!d_bh) + return 0; + + desc = (struct reiserfs_journal_desc *)d_bh->b_data; + if (get_desc_trans_len(desc) > 0 + && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { + if (oldest_invalid_trans_id && *oldest_invalid_trans_id + && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-986: transaction " + "is valid returning because trans_id %d is greater than " + "oldest_invalid %lu", + get_desc_trans_id(desc), + *oldest_invalid_trans_id); + return 0; + } + if (newest_mount_id + && *newest_mount_id > get_desc_mount_id(desc)) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1087: transaction " + "is valid returning because mount_id %d is less than " + "newest_mount_id %lu", + get_desc_mount_id(desc), + *newest_mount_id); + return -1; + } + if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) { + reiserfs_warning(p_s_sb, + "journal-2018: Bad transaction length %d encountered, ignoring transaction", + get_desc_trans_len(desc)); + return -1; + } + offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); + + /* ok, we have a journal description block, lets see if the transaction was valid */ + c_bh = + journal_bread(p_s_sb, + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + ((offset + get_desc_trans_len(desc) + + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); + if (!c_bh) + return 0; + commit = (struct reiserfs_journal_commit *)c_bh->b_data; + if (journal_compare_desc_commit(p_s_sb, desc, commit)) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal_transaction_is_valid, commit offset %ld had bad " + "time %d or length %d", + c_bh->b_blocknr - + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), + get_commit_trans_id(commit), + get_commit_trans_len(commit)); + brelse(c_bh); + if (oldest_invalid_trans_id) { + *oldest_invalid_trans_id = + get_desc_trans_id(desc); + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1004: " + "transaction_is_valid setting oldest invalid trans_id " + "to %d", + get_desc_trans_id(desc)); + } + return -1; + } + brelse(c_bh); + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1006: found valid " + "transaction start offset %llu, len %d id %d", + d_bh->b_blocknr - + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), + get_desc_trans_len(desc), + get_desc_trans_id(desc)); + return 1; + } else { + return 0; + } +} + +static void brelse_array(struct buffer_head **heads, int num) +{ + int i; + for (i = 0; i < num; i++) { + brelse(heads[i]); + } } /* @@ -1873,149 +2018,202 @@ static void brelse_array(struct buffer_head **heads, int num) { ** this either reads in a replays a transaction, or returns because the transaction ** is invalid, or too old. */ -static int journal_read_transaction(struct super_block *p_s_sb, unsigned long cur_dblock, unsigned long oldest_start, - unsigned long oldest_trans_id, unsigned long newest_mount_id) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_journal_desc *desc ; - struct reiserfs_journal_commit *commit ; - unsigned long trans_id = 0 ; - struct buffer_head *c_bh ; - struct buffer_head *d_bh ; - struct buffer_head **log_blocks = NULL ; - struct buffer_head **real_blocks = NULL ; - unsigned long trans_offset ; - int i; - int trans_half; - - d_bh = journal_bread(p_s_sb, cur_dblock) ; - if (!d_bh) - return 1 ; - desc = (struct reiserfs_journal_desc *)d_bh->b_data ; - trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " - "journal_read_transaction, offset %llu, len %d mount_id %d", - d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), - get_desc_trans_len(desc), get_desc_mount_id(desc)) ; - if (get_desc_trans_id(desc) < oldest_trans_id) { - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " - "journal_read_trans skipping because %lu is too old", - cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)) ; - brelse(d_bh) ; - return 1 ; - } - if (get_desc_mount_id(desc) != newest_mount_id) { - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " - "journal_read_trans skipping because %d is != " - "newest_mount_id %lu", get_desc_mount_id(desc), - newest_mount_id) ; - brelse(d_bh) ; - return 1 ; - } - c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + - ((trans_offset + get_desc_trans_len(desc) + 1) % - SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; - if (!c_bh) { - brelse(d_bh) ; - return 1 ; - } - commit = (struct reiserfs_journal_commit *)c_bh->b_data ; - if (journal_compare_desc_commit(p_s_sb, desc, commit)) { - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal_read_transaction, " - "commit offset %llu had bad time %d or length %d", - c_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), - get_commit_trans_id(commit), get_commit_trans_len(commit)); - brelse(c_bh) ; - brelse(d_bh) ; - return 1; - } - trans_id = get_desc_trans_id(desc) ; - /* now we know we've got a good transaction, and it was inside the valid time ranges */ - log_blocks = reiserfs_kmalloc(get_desc_trans_len(desc) * sizeof(struct buffer_head *), GFP_NOFS, p_s_sb) ; - real_blocks = reiserfs_kmalloc(get_desc_trans_len(desc) * sizeof(struct buffer_head *), GFP_NOFS, p_s_sb) ; - if (!log_blocks || !real_blocks) { - brelse(c_bh) ; - brelse(d_bh) ; - reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; - reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; - reiserfs_warning(p_s_sb, "journal-1169: kmalloc failed, unable to mount FS") ; - return -1 ; - } - /* get all the buffer heads */ - trans_half = journal_trans_half (p_s_sb->s_blocksize) ; - for(i = 0 ; i < get_desc_trans_len(desc) ; i++) { - log_blocks[i] = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + (trans_offset + 1 + i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); - if (i < trans_half) { - real_blocks[i] = sb_getblk(p_s_sb, le32_to_cpu(desc->j_realblock[i])) ; - } else { - real_blocks[i] = sb_getblk(p_s_sb, le32_to_cpu(commit->j_realblock[i - trans_half])) ; - } - if ( real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb) ) { - reiserfs_warning(p_s_sb, "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); - goto abort_replay; - } - /* make sure we don't try to replay onto log or reserved area */ - if (is_block_in_log_or_reserved_area(p_s_sb, real_blocks[i]->b_blocknr)) { - reiserfs_warning(p_s_sb, "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block") ; -abort_replay: - brelse_array(log_blocks, i) ; - brelse_array(real_blocks, i) ; - brelse(c_bh) ; - brelse(d_bh) ; - reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; - reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; - return -1 ; - } - } - /* read in the log blocks, memcpy to the corresponding real block */ - ll_rw_block(READ, get_desc_trans_len(desc), log_blocks) ; - for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { - wait_on_buffer(log_blocks[i]) ; - if (!buffer_uptodate(log_blocks[i])) { - reiserfs_warning(p_s_sb, "journal-1212: REPLAY FAILURE fsck required! buffer write failed") ; - brelse_array(log_blocks + i, get_desc_trans_len(desc) - i) ; - brelse_array(real_blocks, get_desc_trans_len(desc)) ; - brelse(c_bh) ; - brelse(d_bh) ; - reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; - reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; - return -1 ; - } - memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, real_blocks[i]->b_size) ; - set_buffer_uptodate(real_blocks[i]) ; - brelse(log_blocks[i]) ; - } - /* flush out the real blocks */ - for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { - set_buffer_dirty(real_blocks[i]) ; - ll_rw_block(WRITE, 1, real_blocks + i) ; - } - for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { - wait_on_buffer(real_blocks[i]) ; - if (!buffer_uptodate(real_blocks[i])) { - reiserfs_warning(p_s_sb, "journal-1226: REPLAY FAILURE, fsck required! buffer write failed") ; - brelse_array(real_blocks + i, get_desc_trans_len(desc) - i) ; - brelse(c_bh) ; - brelse(d_bh) ; - reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; - reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; - return -1 ; - } - brelse(real_blocks[i]) ; - } - cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ((trans_offset + get_desc_trans_len(desc) + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)) ; - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1095: setting journal " - "start to offset %ld", - cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)) ; - - /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ - journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; - journal->j_last_flush_trans_id = trans_id ; - journal->j_trans_id = trans_id + 1; - brelse(c_bh) ; - brelse(d_bh) ; - reiserfs_kfree(log_blocks, le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), p_s_sb) ; - reiserfs_kfree(real_blocks, le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), p_s_sb) ; - return 0 ; +static int journal_read_transaction(struct super_block *p_s_sb, + unsigned long cur_dblock, + unsigned long oldest_start, + unsigned long oldest_trans_id, + unsigned long newest_mount_id) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_journal_desc *desc; + struct reiserfs_journal_commit *commit; + unsigned long trans_id = 0; + struct buffer_head *c_bh; + struct buffer_head *d_bh; + struct buffer_head **log_blocks = NULL; + struct buffer_head **real_blocks = NULL; + unsigned long trans_offset; + int i; + int trans_half; + + d_bh = journal_bread(p_s_sb, cur_dblock); + if (!d_bh) + return 1; + desc = (struct reiserfs_journal_desc *)d_bh->b_data; + trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " + "journal_read_transaction, offset %llu, len %d mount_id %d", + d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), + get_desc_trans_len(desc), get_desc_mount_id(desc)); + if (get_desc_trans_id(desc) < oldest_trans_id) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " + "journal_read_trans skipping because %lu is too old", + cur_dblock - + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); + brelse(d_bh); + return 1; + } + if (get_desc_mount_id(desc) != newest_mount_id) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " + "journal_read_trans skipping because %d is != " + "newest_mount_id %lu", get_desc_mount_id(desc), + newest_mount_id); + brelse(d_bh); + return 1; + } + c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + ((trans_offset + get_desc_trans_len(desc) + 1) % + SB_ONDISK_JOURNAL_SIZE(p_s_sb))); + if (!c_bh) { + brelse(d_bh); + return 1; + } + commit = (struct reiserfs_journal_commit *)c_bh->b_data; + if (journal_compare_desc_commit(p_s_sb, desc, commit)) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal_read_transaction, " + "commit offset %llu had bad time %d or length %d", + c_bh->b_blocknr - + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), + get_commit_trans_id(commit), + get_commit_trans_len(commit)); + brelse(c_bh); + brelse(d_bh); + return 1; + } + trans_id = get_desc_trans_id(desc); + /* now we know we've got a good transaction, and it was inside the valid time ranges */ + log_blocks = + reiserfs_kmalloc(get_desc_trans_len(desc) * + sizeof(struct buffer_head *), GFP_NOFS, p_s_sb); + real_blocks = + reiserfs_kmalloc(get_desc_trans_len(desc) * + sizeof(struct buffer_head *), GFP_NOFS, p_s_sb); + if (!log_blocks || !real_blocks) { + brelse(c_bh); + brelse(d_bh); + reiserfs_kfree(log_blocks, + get_desc_trans_len(desc) * + sizeof(struct buffer_head *), p_s_sb); + reiserfs_kfree(real_blocks, + get_desc_trans_len(desc) * + sizeof(struct buffer_head *), p_s_sb); + reiserfs_warning(p_s_sb, + "journal-1169: kmalloc failed, unable to mount FS"); + return -1; + } + /* get all the buffer heads */ + trans_half = journal_trans_half(p_s_sb->s_blocksize); + for (i = 0; i < get_desc_trans_len(desc); i++) { + log_blocks[i] = + journal_getblk(p_s_sb, + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + (trans_offset + 1 + + i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); + if (i < trans_half) { + real_blocks[i] = + sb_getblk(p_s_sb, + le32_to_cpu(desc->j_realblock[i])); + } else { + real_blocks[i] = + sb_getblk(p_s_sb, + le32_to_cpu(commit-> + j_realblock[i - trans_half])); + } + if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { + reiserfs_warning(p_s_sb, + "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); + goto abort_replay; + } + /* make sure we don't try to replay onto log or reserved area */ + if (is_block_in_log_or_reserved_area + (p_s_sb, real_blocks[i]->b_blocknr)) { + reiserfs_warning(p_s_sb, + "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block"); + abort_replay: + brelse_array(log_blocks, i); + brelse_array(real_blocks, i); + brelse(c_bh); + brelse(d_bh); + reiserfs_kfree(log_blocks, + get_desc_trans_len(desc) * + sizeof(struct buffer_head *), p_s_sb); + reiserfs_kfree(real_blocks, + get_desc_trans_len(desc) * + sizeof(struct buffer_head *), p_s_sb); + return -1; + } + } + /* read in the log blocks, memcpy to the corresponding real block */ + ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); + for (i = 0; i < get_desc_trans_len(desc); i++) { + wait_on_buffer(log_blocks[i]); + if (!buffer_uptodate(log_blocks[i])) { + reiserfs_warning(p_s_sb, + "journal-1212: REPLAY FAILURE fsck required! buffer write failed"); + brelse_array(log_blocks + i, + get_desc_trans_len(desc) - i); + brelse_array(real_blocks, get_desc_trans_len(desc)); + brelse(c_bh); + brelse(d_bh); + reiserfs_kfree(log_blocks, + get_desc_trans_len(desc) * + sizeof(struct buffer_head *), p_s_sb); + reiserfs_kfree(real_blocks, + get_desc_trans_len(desc) * + sizeof(struct buffer_head *), p_s_sb); + return -1; + } + memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, + real_blocks[i]->b_size); + set_buffer_uptodate(real_blocks[i]); + brelse(log_blocks[i]); + } + /* flush out the real blocks */ + for (i = 0; i < get_desc_trans_len(desc); i++) { + set_buffer_dirty(real_blocks[i]); + ll_rw_block(WRITE, 1, real_blocks + i); + } + for (i = 0; i < get_desc_trans_len(desc); i++) { + wait_on_buffer(real_blocks[i]); + if (!buffer_uptodate(real_blocks[i])) { + reiserfs_warning(p_s_sb, + "journal-1226: REPLAY FAILURE, fsck required! buffer write failed"); + brelse_array(real_blocks + i, + get_desc_trans_len(desc) - i); + brelse(c_bh); + brelse(d_bh); + reiserfs_kfree(log_blocks, + get_desc_trans_len(desc) * + sizeof(struct buffer_head *), p_s_sb); + reiserfs_kfree(real_blocks, + get_desc_trans_len(desc) * + sizeof(struct buffer_head *), p_s_sb); + return -1; + } + brelse(real_blocks[i]); + } + cur_dblock = + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + ((trans_offset + get_desc_trans_len(desc) + + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1095: setting journal " "start to offset %ld", + cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); + + /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ + journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); + journal->j_last_flush_trans_id = trans_id; + journal->j_trans_id = trans_id + 1; + brelse(c_bh); + brelse(d_bh); + reiserfs_kfree(log_blocks, + le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), + p_s_sb); + reiserfs_kfree(real_blocks, + le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), + p_s_sb); + return 0; } /* This function reads blocks starting from block and to max_block of bufsize @@ -2024,39 +2222,39 @@ abort_replay: Right now it is only used from journal code. But later we might use it from other places. Note: Do not use journal_getblk/sb_getblk functions here! */ -static struct buffer_head * reiserfs_breada (struct block_device *dev, int block, int bufsize, - unsigned int max_block) +static struct buffer_head *reiserfs_breada(struct block_device *dev, int block, + int bufsize, unsigned int max_block) { - struct buffer_head * bhlist[BUFNR]; + struct buffer_head *bhlist[BUFNR]; unsigned int blocks = BUFNR; - struct buffer_head * bh; + struct buffer_head *bh; int i, j; - - bh = __getblk (dev, block, bufsize ); - if (buffer_uptodate (bh)) - return (bh); - + + bh = __getblk(dev, block, bufsize); + if (buffer_uptodate(bh)) + return (bh); + if (block + BUFNR > max_block) { blocks = max_block - block; } bhlist[0] = bh; j = 1; for (i = 1; i < blocks; i++) { - bh = __getblk (dev, block + i, bufsize); - if (buffer_uptodate (bh)) { - brelse (bh); + bh = __getblk(dev, block + i, bufsize); + if (buffer_uptodate(bh)) { + brelse(bh); break; - } - else bhlist[j++] = bh; + } else + bhlist[j++] = bh; } - ll_rw_block (READ, j, bhlist); - for(i = 1; i < j; i++) - brelse (bhlist[i]); + ll_rw_block(READ, j, bhlist); + for (i = 1; i < j; i++) + brelse(bhlist[i]); bh = bhlist[0]; - wait_on_buffer (bh); - if (buffer_uptodate (bh)) + wait_on_buffer(bh); + if (buffer_uptodate(bh)) return bh; - brelse (bh); + brelse(bh); return NULL; } @@ -2069,218 +2267,250 @@ static struct buffer_head * reiserfs_breada (struct block_device *dev, int block ** ** On exit, it sets things up so the first transaction will work correctly. */ -static int journal_read(struct super_block *p_s_sb) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_journal_desc *desc ; - unsigned long oldest_trans_id = 0; - unsigned long oldest_invalid_trans_id = 0 ; - time_t start ; - unsigned long oldest_start = 0; - unsigned long cur_dblock = 0 ; - unsigned long newest_mount_id = 9 ; - struct buffer_head *d_bh ; - struct reiserfs_journal_header *jh ; - int valid_journal_header = 0 ; - int replay_count = 0 ; - int continue_replay = 1 ; - int ret ; - char b[BDEVNAME_SIZE]; - - cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; - reiserfs_info (p_s_sb, "checking transaction log (%s)\n", - bdevname(journal->j_dev_bd, b)); - start = get_seconds(); - - /* step 1, read in the journal header block. Check the transaction it says - ** is the first unflushed, and if that transaction is not valid, - ** replay is done - */ - journal->j_header_bh = journal_bread(p_s_sb, - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + - SB_ONDISK_JOURNAL_SIZE(p_s_sb)); - if (!journal->j_header_bh) { - return 1 ; - } - jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data) ; - if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && - le32_to_cpu(jh->j_first_unflushed_offset) < SB_ONDISK_JOURNAL_SIZE(p_s_sb) && - le32_to_cpu(jh->j_last_flush_trans_id) > 0) { - oldest_start = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + - le32_to_cpu(jh->j_first_unflushed_offset) ; - oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; - newest_mount_id = le32_to_cpu(jh->j_mount_id); - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1153: found in " - "header: first_unflushed_offset %d, last_flushed_trans_id " - "%lu", le32_to_cpu(jh->j_first_unflushed_offset), - le32_to_cpu(jh->j_last_flush_trans_id)) ; - valid_journal_header = 1 ; - - /* now, we try to read the first unflushed offset. If it is not valid, - ** there is nothing more we can do, and it makes no sense to read - ** through the whole log. - */ - d_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + le32_to_cpu(jh->j_first_unflushed_offset)) ; - ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL) ; - if (!ret) { - continue_replay = 0 ; - } - brelse(d_bh) ; - goto start_log_replay; - } - - if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { - reiserfs_warning (p_s_sb, - "clm-2076: device is readonly, unable to replay log") ; - return -1 ; - } - - /* ok, there are transactions that need to be replayed. start with the first log block, find - ** all the valid transactions, and pick out the oldest. - */ - while(continue_replay && cur_dblock < (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { - /* Note that it is required for blocksize of primary fs device and journal - device to be the same */ - d_bh = reiserfs_breada(journal->j_dev_bd, cur_dblock, p_s_sb->s_blocksize, - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) ; - ret = journal_transaction_is_valid(p_s_sb, d_bh, &oldest_invalid_trans_id, &newest_mount_id) ; - if (ret == 1) { - desc = (struct reiserfs_journal_desc *)d_bh->b_data ; - if (oldest_start == 0) { /* init all oldest_ values */ - oldest_trans_id = get_desc_trans_id(desc) ; - oldest_start = d_bh->b_blocknr ; - newest_mount_id = get_desc_mount_id(desc) ; - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1179: Setting " - "oldest_start to offset %llu, trans_id %lu", - oldest_start - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), - oldest_trans_id) ; - } else if (oldest_trans_id > get_desc_trans_id(desc)) { - /* one we just read was older */ - oldest_trans_id = get_desc_trans_id(desc) ; - oldest_start = d_bh->b_blocknr ; - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1180: Resetting " - "oldest_start to offset %lu, trans_id %lu", - oldest_start - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), - oldest_trans_id) ; - } - if (newest_mount_id < get_desc_mount_id(desc)) { - newest_mount_id = get_desc_mount_id(desc) ; +static int journal_read(struct super_block *p_s_sb) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_journal_desc *desc; + unsigned long oldest_trans_id = 0; + unsigned long oldest_invalid_trans_id = 0; + time_t start; + unsigned long oldest_start = 0; + unsigned long cur_dblock = 0; + unsigned long newest_mount_id = 9; + struct buffer_head *d_bh; + struct reiserfs_journal_header *jh; + int valid_journal_header = 0; + int replay_count = 0; + int continue_replay = 1; + int ret; + char b[BDEVNAME_SIZE]; + + cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); + reiserfs_info(p_s_sb, "checking transaction log (%s)\n", + bdevname(journal->j_dev_bd, b)); + start = get_seconds(); + + /* step 1, read in the journal header block. Check the transaction it says + ** is the first unflushed, and if that transaction is not valid, + ** replay is done + */ + journal->j_header_bh = journal_bread(p_s_sb, + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); + if (!journal->j_header_bh) { + return 1; + } + jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); + if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && + le32_to_cpu(jh->j_first_unflushed_offset) < + SB_ONDISK_JOURNAL_SIZE(p_s_sb) + && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { + oldest_start = + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + le32_to_cpu(jh->j_first_unflushed_offset); + oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; + newest_mount_id = le32_to_cpu(jh->j_mount_id); + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1153: found in " + "header: first_unflushed_offset %d, last_flushed_trans_id " + "%lu", le32_to_cpu(jh->j_first_unflushed_offset), + le32_to_cpu(jh->j_last_flush_trans_id)); + valid_journal_header = 1; + + /* now, we try to read the first unflushed offset. If it is not valid, + ** there is nothing more we can do, and it makes no sense to read + ** through the whole log. + */ + d_bh = + journal_bread(p_s_sb, + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + le32_to_cpu(jh->j_first_unflushed_offset)); + ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL); + if (!ret) { + continue_replay = 0; + } + brelse(d_bh); + goto start_log_replay; + } + + if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { + reiserfs_warning(p_s_sb, + "clm-2076: device is readonly, unable to replay log"); + return -1; + } + + /* ok, there are transactions that need to be replayed. start with the first log block, find + ** all the valid transactions, and pick out the oldest. + */ + while (continue_replay + && cur_dblock < + (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { + /* Note that it is required for blocksize of primary fs device and journal + device to be the same */ + d_bh = + reiserfs_breada(journal->j_dev_bd, cur_dblock, + p_s_sb->s_blocksize, + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); + ret = + journal_transaction_is_valid(p_s_sb, d_bh, + &oldest_invalid_trans_id, + &newest_mount_id); + if (ret == 1) { + desc = (struct reiserfs_journal_desc *)d_bh->b_data; + if (oldest_start == 0) { /* init all oldest_ values */ + oldest_trans_id = get_desc_trans_id(desc); + oldest_start = d_bh->b_blocknr; + newest_mount_id = get_desc_mount_id(desc); + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1179: Setting " + "oldest_start to offset %llu, trans_id %lu", + oldest_start - + SB_ONDISK_JOURNAL_1st_BLOCK + (p_s_sb), oldest_trans_id); + } else if (oldest_trans_id > get_desc_trans_id(desc)) { + /* one we just read was older */ + oldest_trans_id = get_desc_trans_id(desc); + oldest_start = d_bh->b_blocknr; + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1180: Resetting " + "oldest_start to offset %lu, trans_id %lu", + oldest_start - + SB_ONDISK_JOURNAL_1st_BLOCK + (p_s_sb), oldest_trans_id); + } + if (newest_mount_id < get_desc_mount_id(desc)) { + newest_mount_id = get_desc_mount_id(desc); + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1299: Setting " + "newest_mount_id to %d", + get_desc_mount_id(desc)); + } + cur_dblock += get_desc_trans_len(desc) + 2; + } else { + cur_dblock++; + } + brelse(d_bh); + } + + start_log_replay: + cur_dblock = oldest_start; + if (oldest_trans_id) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1206: Starting replay " + "from offset %llu, trans_id %lu", + cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), + oldest_trans_id); + + } + replay_count = 0; + while (continue_replay && oldest_trans_id > 0) { + ret = + journal_read_transaction(p_s_sb, cur_dblock, oldest_start, + oldest_trans_id, newest_mount_id); + if (ret < 0) { + return ret; + } else if (ret != 0) { + break; + } + cur_dblock = + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start; + replay_count++; + if (cur_dblock == oldest_start) + break; + } + + if (oldest_trans_id == 0) { + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "journal-1225: No valid " "transactions found"); + } + /* j_start does not get set correctly if we don't replay any transactions. + ** if we had a valid journal_header, set j_start to the first unflushed transaction value, + ** copy the trans_id from the header + */ + if (valid_journal_header && replay_count == 0) { + journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); + journal->j_trans_id = + le32_to_cpu(jh->j_last_flush_trans_id) + 1; + journal->j_last_flush_trans_id = + le32_to_cpu(jh->j_last_flush_trans_id); + journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; + } else { + journal->j_mount_id = newest_mount_id + 1; + } reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " - "newest_mount_id to %d", get_desc_mount_id(desc)); - } - cur_dblock += get_desc_trans_len(desc) + 2 ; - } else { - cur_dblock++ ; - } - brelse(d_bh) ; - } - -start_log_replay: - cur_dblock = oldest_start ; - if (oldest_trans_id) { - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1206: Starting replay " - "from offset %llu, trans_id %lu", - cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), - oldest_trans_id) ; - - } - replay_count = 0 ; - while(continue_replay && oldest_trans_id > 0) { - ret = journal_read_transaction(p_s_sb, cur_dblock, oldest_start, oldest_trans_id, newest_mount_id) ; - if (ret < 0) { - return ret ; - } else if (ret != 0) { - break ; - } - cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start ; - replay_count++ ; - if (cur_dblock == oldest_start) - break; - } - - if (oldest_trans_id == 0) { - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1225: No valid " - "transactions found") ; - } - /* j_start does not get set correctly if we don't replay any transactions. - ** if we had a valid journal_header, set j_start to the first unflushed transaction value, - ** copy the trans_id from the header - */ - if (valid_journal_header && replay_count == 0) { - journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset) ; - journal->j_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; - journal->j_last_flush_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) ; - journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; - } else { - journal->j_mount_id = newest_mount_id + 1 ; - } - reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " - "newest_mount_id to %lu", journal->j_mount_id) ; - journal->j_first_unflushed_offset = journal->j_start ; - if (replay_count > 0) { - reiserfs_info (p_s_sb, "replayed %d transactions in %lu seconds\n", - replay_count, get_seconds() - start) ; - } - if (!bdev_read_only(p_s_sb->s_bdev) && - _update_journal_header_block(p_s_sb, journal->j_start, - journal->j_last_flush_trans_id)) - { - /* replay failed, caller must call free_journal_ram and abort - ** the mount - */ - return -1 ; - } - return 0 ; + "newest_mount_id to %lu", journal->j_mount_id); + journal->j_first_unflushed_offset = journal->j_start; + if (replay_count > 0) { + reiserfs_info(p_s_sb, + "replayed %d transactions in %lu seconds\n", + replay_count, get_seconds() - start); + } + if (!bdev_read_only(p_s_sb->s_bdev) && + _update_journal_header_block(p_s_sb, journal->j_start, + journal->j_last_flush_trans_id)) { + /* replay failed, caller must call free_journal_ram and abort + ** the mount + */ + return -1; + } + return 0; } static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) { - struct reiserfs_journal_list *jl; -retry: - jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, s); - if (!jl) { - yield(); - goto retry; - } - memset(jl, 0, sizeof(*jl)); - INIT_LIST_HEAD(&jl->j_list); - INIT_LIST_HEAD(&jl->j_working_list); - INIT_LIST_HEAD(&jl->j_tail_bh_list); - INIT_LIST_HEAD(&jl->j_bh_list); - sema_init(&jl->j_commit_lock, 1); - SB_JOURNAL(s)->j_num_lists++; - get_journal_list(jl); - return jl; -} - -static void journal_list_init(struct super_block *p_s_sb) { - SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); -} - -static int release_journal_dev( struct super_block *super, - struct reiserfs_journal *journal ) -{ - int result; - - result = 0; - - if( journal -> j_dev_file != NULL ) { - result = filp_close( journal -> j_dev_file, NULL ); - journal -> j_dev_file = NULL; - journal -> j_dev_bd = NULL; - } else if( journal -> j_dev_bd != NULL ) { - result = blkdev_put( journal -> j_dev_bd ); - journal -> j_dev_bd = NULL; - } - - if( result != 0 ) { - reiserfs_warning(super, "sh-457: release_journal_dev: Cannot release journal device: %i", result ); - } - return result; -} - -static int journal_init_dev( struct super_block *super, - struct reiserfs_journal *journal, - const char *jdev_name ) + struct reiserfs_journal_list *jl; + retry: + jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, + s); + if (!jl) { + yield(); + goto retry; + } + memset(jl, 0, sizeof(*jl)); + INIT_LIST_HEAD(&jl->j_list); + INIT_LIST_HEAD(&jl->j_working_list); + INIT_LIST_HEAD(&jl->j_tail_bh_list); + INIT_LIST_HEAD(&jl->j_bh_list); + sema_init(&jl->j_commit_lock, 1); + SB_JOURNAL(s)->j_num_lists++; + get_journal_list(jl); + return jl; +} + +static void journal_list_init(struct super_block *p_s_sb) +{ + SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); +} + +static int release_journal_dev(struct super_block *super, + struct reiserfs_journal *journal) +{ + int result; + + result = 0; + + if (journal->j_dev_file != NULL) { + result = filp_close(journal->j_dev_file, NULL); + journal->j_dev_file = NULL; + journal->j_dev_bd = NULL; + } else if (journal->j_dev_bd != NULL) { + result = blkdev_put(journal->j_dev_bd); + journal->j_dev_bd = NULL; + } + + if (result != 0) { + reiserfs_warning(super, + "sh-457: release_journal_dev: Cannot release journal device: %i", + result); + } + return result; +} + +static int journal_init_dev(struct super_block *super, + struct reiserfs_journal *journal, + const char *jdev_name) { int result; dev_t jdev; @@ -2289,50 +2519,51 @@ static int journal_init_dev( struct super_block *super, result = 0; - journal -> j_dev_bd = NULL; - journal -> j_dev_file = NULL; - jdev = SB_ONDISK_JOURNAL_DEVICE( super ) ? - new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; + journal->j_dev_bd = NULL; + journal->j_dev_file = NULL; + jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? + new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; if (bdev_read_only(super->s_bdev)) - blkdev_mode = FMODE_READ; + blkdev_mode = FMODE_READ; /* there is no "jdev" option and journal is on separate device */ - if( ( !jdev_name || !jdev_name[ 0 ] ) ) { + if ((!jdev_name || !jdev_name[0])) { journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; - reiserfs_warning (super, "sh-458: journal_init_dev: " - "cannot init journal device '%s': %i", - __bdevname(jdev, b), result ); + reiserfs_warning(super, "sh-458: journal_init_dev: " + "cannot init journal device '%s': %i", + __bdevname(jdev, b), result); return result; } else if (jdev != super->s_dev) set_blocksize(journal->j_dev_bd, super->s_blocksize); return 0; } - journal -> j_dev_file = filp_open( jdev_name, 0, 0 ); - if( !IS_ERR( journal -> j_dev_file ) ) { + journal->j_dev_file = filp_open(jdev_name, 0, 0); + if (!IS_ERR(journal->j_dev_file)) { struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; - if( !S_ISBLK( jdev_inode -> i_mode ) ) { + if (!S_ISBLK(jdev_inode->i_mode)) { reiserfs_warning(super, "journal_init_dev: '%s' is " - "not a block device", jdev_name ); + "not a block device", jdev_name); result = -ENOTBLK; - release_journal_dev( super, journal ); - } else { + release_journal_dev(super, journal); + } else { /* ok */ journal->j_dev_bd = I_BDEV(jdev_inode); set_blocksize(journal->j_dev_bd, super->s_blocksize); - reiserfs_info(super, "journal_init_dev: journal device: %s\n", + reiserfs_info(super, + "journal_init_dev: journal device: %s\n", bdevname(journal->j_dev_bd, b)); } } else { - result = PTR_ERR( journal -> j_dev_file ); - journal -> j_dev_file = NULL; - reiserfs_warning (super, - "journal_init_dev: Cannot open '%s': %i", - jdev_name, result ); + result = PTR_ERR(journal->j_dev_file); + journal->j_dev_file = NULL; + reiserfs_warning(super, + "journal_init_dev: Cannot open '%s': %i", + jdev_name, result); } return result; } @@ -2340,193 +2571,214 @@ static int journal_init_dev( struct super_block *super, /* ** must be called once on fs mount. calls journal_read for you */ -int journal_init(struct super_block *p_s_sb, const char * j_dev_name, int old_format, unsigned int commit_max_age) { - int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2 ; - struct buffer_head *bhjh; - struct reiserfs_super_block * rs; - struct reiserfs_journal_header *jh; - struct reiserfs_journal *journal; - struct reiserfs_journal_list *jl; - char b[BDEVNAME_SIZE]; - - journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof (struct reiserfs_journal)) ; - if (!journal) { - reiserfs_warning (p_s_sb, "journal-1256: unable to get memory for journal structure") ; - return 1 ; - } - memset(journal, 0, sizeof(struct reiserfs_journal)) ; - INIT_LIST_HEAD(&journal->j_bitmap_nodes) ; - INIT_LIST_HEAD (&journal->j_prealloc_list); - INIT_LIST_HEAD(&journal->j_working_list); - INIT_LIST_HEAD(&journal->j_journal_list); - journal->j_persistent_trans = 0; - if (reiserfs_allocate_list_bitmaps(p_s_sb, - journal->j_list_bitmap, - SB_BMAP_NR(p_s_sb))) - goto free_and_return ; - allocate_bitmap_nodes(p_s_sb) ; - - /* reserved for journal area support */ - SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? - REISERFS_OLD_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + - SB_BMAP_NR(p_s_sb) + 1 : - REISERFS_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + 2); - - /* Sanity check to see is the standard journal fitting withing first bitmap - (actual for small blocksizes) */ - if ( !SB_ONDISK_JOURNAL_DEVICE( p_s_sb ) && - (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8) ) { - reiserfs_warning (p_s_sb, "journal-1393: journal does not fit for area " - "addressed by first of bitmap blocks. It starts at " - "%u and its size is %u. Block size %ld", - SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), - SB_ONDISK_JOURNAL_SIZE(p_s_sb), p_s_sb->s_blocksize); - goto free_and_return; - } - - if( journal_init_dev( p_s_sb, journal, j_dev_name ) != 0 ) { - reiserfs_warning (p_s_sb, "sh-462: unable to initialize jornal device"); - goto free_and_return; - } - - rs = SB_DISK_SUPER_BLOCK(p_s_sb); - - /* read journal header */ - bhjh = journal_bread(p_s_sb, - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); - if (!bhjh) { - reiserfs_warning (p_s_sb, "sh-459: unable to read journal header"); - goto free_and_return; - } - jh = (struct reiserfs_journal_header *)(bhjh->b_data); - - /* make sure that journal matches to the super block */ - if (is_reiserfs_jr(rs) && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != sb_jp_journal_magic(rs))) { - reiserfs_warning (p_s_sb, "sh-460: journal header magic %x " - "(device %s) does not match to magic found in super " - "block %x", - jh->jh_journal.jp_journal_magic, - bdevname( journal->j_dev_bd, b), - sb_jp_journal_magic(rs)); - brelse (bhjh); - goto free_and_return; - } - - journal->j_trans_max = le32_to_cpu (jh->jh_journal.jp_journal_trans_max); - journal->j_max_batch = le32_to_cpu (jh->jh_journal.jp_journal_max_batch); - journal->j_max_commit_age = le32_to_cpu (jh->jh_journal.jp_journal_max_commit_age); - journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; - - if (journal->j_trans_max) { - /* make sure these parameters are available, assign it if they are not */ - __u32 initial = journal->j_trans_max; - __u32 ratio = 1; - - if (p_s_sb->s_blocksize < 4096) - ratio = 4096 / p_s_sb->s_blocksize; - - if (SB_ONDISK_JOURNAL_SIZE(p_s_sb)/journal->j_trans_max < JOURNAL_MIN_RATIO) - journal->j_trans_max = SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO; - if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio) - journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT / ratio; - if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio) - journal->j_trans_max = JOURNAL_TRANS_MIN_DEFAULT / ratio; - - if (journal->j_trans_max != initial) - reiserfs_warning (p_s_sb, "sh-461: journal_init: wrong transaction max size (%u). Changed to %u", - initial, journal->j_trans_max); - - journal->j_max_batch = journal->j_trans_max* - JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT; - } - - if (!journal->j_trans_max) { - /*we have the file system was created by old version of mkreiserfs - so this field contains zero value */ - journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT ; - journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT ; - journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE ; - - /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096 - trans max size is decreased proportionally */ - if (p_s_sb->s_blocksize < 4096) { - journal->j_trans_max /= (4096 / p_s_sb->s_blocksize) ; - journal->j_max_batch = (journal->j_trans_max) * 9 / 10 ; - } - } - - journal->j_default_max_commit_age = journal->j_max_commit_age; - - if (commit_max_age != 0) { - journal->j_max_commit_age = commit_max_age; - journal->j_max_trans_age = commit_max_age; - } - - reiserfs_info (p_s_sb, "journal params: device %s, size %u, " - "journal first block %u, max trans len %u, max batch %u, " - "max commit age %u, max trans age %u\n", - bdevname( journal->j_dev_bd, b), - SB_ONDISK_JOURNAL_SIZE(p_s_sb), - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), - journal->j_trans_max, - journal->j_max_batch, - journal->j_max_commit_age, - journal->j_max_trans_age); - - brelse (bhjh); - - journal->j_list_bitmap_index = 0 ; - journal_list_init(p_s_sb) ; - - memset(journal->j_list_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; - - INIT_LIST_HEAD(&journal->j_dirty_buffers) ; - spin_lock_init(&journal->j_dirty_buffers_lock) ; - - journal->j_start = 0 ; - journal->j_len = 0 ; - journal->j_len_alloc = 0 ; - atomic_set(&(journal->j_wcount), 0) ; - atomic_set(&(journal->j_async_throttle), 0) ; - journal->j_bcount = 0 ; - journal->j_trans_start_time = 0 ; - journal->j_last = NULL ; - journal->j_first = NULL ; - init_waitqueue_head(&(journal->j_join_wait)) ; - sema_init(&journal->j_lock, 1); - sema_init(&journal->j_flush_sem, 1); - - journal->j_trans_id = 10 ; - journal->j_mount_id = 10 ; - journal->j_state = 0 ; - atomic_set(&(journal->j_jlock), 0) ; - journal->j_cnode_free_list = allocate_cnodes(num_cnodes) ; - journal->j_cnode_free_orig = journal->j_cnode_free_list ; - journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0 ; - journal->j_cnode_used = 0 ; - journal->j_must_wait = 0 ; - - init_journal_hash(p_s_sb) ; - jl = journal->j_current_jl; - jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); - if (!jl->j_list_bitmap) { - reiserfs_warning(p_s_sb, "journal-2005, get_list_bitmap failed for journal list 0") ; - goto free_and_return; - } - if (journal_read(p_s_sb) < 0) { - reiserfs_warning(p_s_sb, "Replay Failure, unable to mount") ; - goto free_and_return; - } - - reiserfs_mounted_fs_count++ ; - if (reiserfs_mounted_fs_count <= 1) - commit_wq = create_workqueue("reiserfs"); - - INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb); - return 0 ; -free_and_return: - free_journal_ram(p_s_sb); - return 1; +int journal_init(struct super_block *p_s_sb, const char *j_dev_name, + int old_format, unsigned int commit_max_age) +{ + int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2; + struct buffer_head *bhjh; + struct reiserfs_super_block *rs; + struct reiserfs_journal_header *jh; + struct reiserfs_journal *journal; + struct reiserfs_journal_list *jl; + char b[BDEVNAME_SIZE]; + + journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal)); + if (!journal) { + reiserfs_warning(p_s_sb, + "journal-1256: unable to get memory for journal structure"); + return 1; + } + memset(journal, 0, sizeof(struct reiserfs_journal)); + INIT_LIST_HEAD(&journal->j_bitmap_nodes); + INIT_LIST_HEAD(&journal->j_prealloc_list); + INIT_LIST_HEAD(&journal->j_working_list); + INIT_LIST_HEAD(&journal->j_journal_list); + journal->j_persistent_trans = 0; + if (reiserfs_allocate_list_bitmaps(p_s_sb, + journal->j_list_bitmap, + SB_BMAP_NR(p_s_sb))) + goto free_and_return; + allocate_bitmap_nodes(p_s_sb); + + /* reserved for journal area support */ + SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? + REISERFS_OLD_DISK_OFFSET_IN_BYTES + / p_s_sb->s_blocksize + + SB_BMAP_NR(p_s_sb) + + 1 : + REISERFS_DISK_OFFSET_IN_BYTES / + p_s_sb->s_blocksize + 2); + + /* Sanity check to see is the standard journal fitting withing first bitmap + (actual for small blocksizes) */ + if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) && + (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + + SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) { + reiserfs_warning(p_s_sb, + "journal-1393: journal does not fit for area " + "addressed by first of bitmap blocks. It starts at " + "%u and its size is %u. Block size %ld", + SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), + SB_ONDISK_JOURNAL_SIZE(p_s_sb), + p_s_sb->s_blocksize); + goto free_and_return; + } + + if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) { + reiserfs_warning(p_s_sb, + "sh-462: unable to initialize jornal device"); + goto free_and_return; + } + + rs = SB_DISK_SUPER_BLOCK(p_s_sb); + + /* read journal header */ + bhjh = journal_bread(p_s_sb, + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); + if (!bhjh) { + reiserfs_warning(p_s_sb, + "sh-459: unable to read journal header"); + goto free_and_return; + } + jh = (struct reiserfs_journal_header *)(bhjh->b_data); + + /* make sure that journal matches to the super block */ + if (is_reiserfs_jr(rs) + && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != + sb_jp_journal_magic(rs))) { + reiserfs_warning(p_s_sb, + "sh-460: journal header magic %x " + "(device %s) does not match to magic found in super " + "block %x", jh->jh_journal.jp_journal_magic, + bdevname(journal->j_dev_bd, b), + sb_jp_journal_magic(rs)); + brelse(bhjh); + goto free_and_return; + } + + journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); + journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); + journal->j_max_commit_age = + le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); + journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; + + if (journal->j_trans_max) { + /* make sure these parameters are available, assign it if they are not */ + __u32 initial = journal->j_trans_max; + __u32 ratio = 1; + + if (p_s_sb->s_blocksize < 4096) + ratio = 4096 / p_s_sb->s_blocksize; + + if (SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max < + JOURNAL_MIN_RATIO) + journal->j_trans_max = + SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO; + if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio) + journal->j_trans_max = + JOURNAL_TRANS_MAX_DEFAULT / ratio; + if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio) + journal->j_trans_max = + JOURNAL_TRANS_MIN_DEFAULT / ratio; + + if (journal->j_trans_max != initial) + reiserfs_warning(p_s_sb, + "sh-461: journal_init: wrong transaction max size (%u). Changed to %u", + initial, journal->j_trans_max); + + journal->j_max_batch = journal->j_trans_max * + JOURNAL_MAX_BATCH_DEFAULT / JOURNAL_TRANS_MAX_DEFAULT; + } + + if (!journal->j_trans_max) { + /*we have the file system was created by old version of mkreiserfs + so this field contains zero value */ + journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; + journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; + journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; + + /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096 + trans max size is decreased proportionally */ + if (p_s_sb->s_blocksize < 4096) { + journal->j_trans_max /= (4096 / p_s_sb->s_blocksize); + journal->j_max_batch = (journal->j_trans_max) * 9 / 10; + } + } + + journal->j_default_max_commit_age = journal->j_max_commit_age; + + if (commit_max_age != 0) { + journal->j_max_commit_age = commit_max_age; + journal->j_max_trans_age = commit_max_age; + } + + reiserfs_info(p_s_sb, "journal params: device %s, size %u, " + "journal first block %u, max trans len %u, max batch %u, " + "max commit age %u, max trans age %u\n", + bdevname(journal->j_dev_bd, b), + SB_ONDISK_JOURNAL_SIZE(p_s_sb), + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), + journal->j_trans_max, + journal->j_max_batch, + journal->j_max_commit_age, journal->j_max_trans_age); + + brelse(bhjh); + + journal->j_list_bitmap_index = 0; + journal_list_init(p_s_sb); + + memset(journal->j_list_hash_table, 0, + JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); + + INIT_LIST_HEAD(&journal->j_dirty_buffers); + spin_lock_init(&journal->j_dirty_buffers_lock); + + journal->j_start = 0; + journal->j_len = 0; + journal->j_len_alloc = 0; + atomic_set(&(journal->j_wcount), 0); + atomic_set(&(journal->j_async_throttle), 0); + journal->j_bcount = 0; + journal->j_trans_start_time = 0; + journal->j_last = NULL; + journal->j_first = NULL; + init_waitqueue_head(&(journal->j_join_wait)); + sema_init(&journal->j_lock, 1); + sema_init(&journal->j_flush_sem, 1); + + journal->j_trans_id = 10; + journal->j_mount_id = 10; + journal->j_state = 0; + atomic_set(&(journal->j_jlock), 0); + journal->j_cnode_free_list = allocate_cnodes(num_cnodes); + journal->j_cnode_free_orig = journal->j_cnode_free_list; + journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; + journal->j_cnode_used = 0; + journal->j_must_wait = 0; + + init_journal_hash(p_s_sb); + jl = journal->j_current_jl; + jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); + if (!jl->j_list_bitmap) { + reiserfs_warning(p_s_sb, + "journal-2005, get_list_bitmap failed for journal list 0"); + goto free_and_return; + } + if (journal_read(p_s_sb) < 0) { + reiserfs_warning(p_s_sb, "Replay Failure, unable to mount"); + goto free_and_return; + } + + reiserfs_mounted_fs_count++; + if (reiserfs_mounted_fs_count <= 1) + commit_wq = create_workqueue("reiserfs"); + + INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb); + return 0; + free_and_return: + free_journal_ram(p_s_sb); + return 1; } /* @@ -2534,96 +2786,102 @@ free_and_return: ** be used by delete to make sure they don't write more than can fit inside a single ** transaction */ -int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) { - struct reiserfs_journal *journal = SB_JOURNAL (th->t_super); - time_t now = get_seconds() ; - /* cannot restart while nested */ - BUG_ON (!th->t_trans_id); - if (th->t_refcount > 1) - return 0 ; - if ( journal->j_must_wait > 0 || - (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || - atomic_read(&(journal->j_jlock)) || - (now - journal->j_trans_start_time) > journal->j_max_trans_age || - journal->j_cnode_free < (journal->j_trans_max * 3)) { - return 1 ; - } - return 0 ; +int journal_transaction_should_end(struct reiserfs_transaction_handle *th, + int new_alloc) +{ + struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); + time_t now = get_seconds(); + /* cannot restart while nested */ + BUG_ON(!th->t_trans_id); + if (th->t_refcount > 1) + return 0; + if (journal->j_must_wait > 0 || + (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || + atomic_read(&(journal->j_jlock)) || + (now - journal->j_trans_start_time) > journal->j_max_trans_age || + journal->j_cnode_free < (journal->j_trans_max * 3)) { + return 1; + } + return 0; } /* this must be called inside a transaction, and requires the ** kernel_lock to be held */ -void reiserfs_block_writes(struct reiserfs_transaction_handle *th) { - struct reiserfs_journal *journal = SB_JOURNAL (th->t_super); - BUG_ON (!th->t_trans_id); - journal->j_must_wait = 1 ; - set_bit(J_WRITERS_BLOCKED, &journal->j_state) ; - return ; +void reiserfs_block_writes(struct reiserfs_transaction_handle *th) +{ + struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); + BUG_ON(!th->t_trans_id); + journal->j_must_wait = 1; + set_bit(J_WRITERS_BLOCKED, &journal->j_state); + return; } /* this must be called without a transaction started, and does not ** require BKL */ -void reiserfs_allow_writes(struct super_block *s) { - struct reiserfs_journal *journal = SB_JOURNAL (s); - clear_bit(J_WRITERS_BLOCKED, &journal->j_state) ; - wake_up(&journal->j_join_wait) ; +void reiserfs_allow_writes(struct super_block *s) +{ + struct reiserfs_journal *journal = SB_JOURNAL(s); + clear_bit(J_WRITERS_BLOCKED, &journal->j_state); + wake_up(&journal->j_join_wait); } /* this must be called without a transaction started, and does not ** require BKL */ -void reiserfs_wait_on_write_block(struct super_block *s) { - struct reiserfs_journal *journal = SB_JOURNAL (s); - wait_event(journal->j_join_wait, - !test_bit(J_WRITERS_BLOCKED, &journal->j_state)) ; -} - -static void queue_log_writer(struct super_block *s) { - wait_queue_t wait; - struct reiserfs_journal *journal = SB_JOURNAL (s); - set_bit(J_WRITERS_QUEUED, &journal->j_state); - - /* - * we don't want to use wait_event here because - * we only want to wait once. - */ - init_waitqueue_entry(&wait, current); - add_wait_queue(&journal->j_join_wait, &wait); - set_current_state(TASK_UNINTERRUPTIBLE); - if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) - schedule(); - current->state = TASK_RUNNING; - remove_wait_queue(&journal->j_join_wait, &wait); -} - -static void wake_queued_writers(struct super_block *s) { - struct reiserfs_journal *journal = SB_JOURNAL (s); - if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) - wake_up(&journal->j_join_wait); -} - -static void let_transaction_grow(struct super_block *sb, - unsigned long trans_id) -{ - struct reiserfs_journal *journal = SB_JOURNAL (sb); - unsigned long bcount = journal->j_bcount; - while(1) { +void reiserfs_wait_on_write_block(struct super_block *s) +{ + struct reiserfs_journal *journal = SB_JOURNAL(s); + wait_event(journal->j_join_wait, + !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); +} + +static void queue_log_writer(struct super_block *s) +{ + wait_queue_t wait; + struct reiserfs_journal *journal = SB_JOURNAL(s); + set_bit(J_WRITERS_QUEUED, &journal->j_state); + + /* + * we don't want to use wait_event here because + * we only want to wait once. + */ + init_waitqueue_entry(&wait, current); + add_wait_queue(&journal->j_join_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; - while ((atomic_read(&journal->j_wcount) > 0 || - atomic_read(&journal->j_jlock)) && - journal->j_trans_id == trans_id) { - queue_log_writer(sb); + if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) + schedule(); + current->state = TASK_RUNNING; + remove_wait_queue(&journal->j_join_wait, &wait); +} + +static void wake_queued_writers(struct super_block *s) +{ + struct reiserfs_journal *journal = SB_JOURNAL(s); + if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) + wake_up(&journal->j_join_wait); +} + +static void let_transaction_grow(struct super_block *sb, unsigned long trans_id) +{ + struct reiserfs_journal *journal = SB_JOURNAL(sb); + unsigned long bcount = journal->j_bcount; + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; + while ((atomic_read(&journal->j_wcount) > 0 || + atomic_read(&journal->j_jlock)) && + journal->j_trans_id == trans_id) { + queue_log_writer(sb); + } + if (journal->j_trans_id != trans_id) + break; + if (bcount == journal->j_bcount) + break; + bcount = journal->j_bcount; } - if (journal->j_trans_id != trans_id) - break; - if (bcount == journal->j_bcount) - break; - bcount = journal->j_bcount; - } } /* join == true if you must join an existing transaction. @@ -2632,224 +2890,244 @@ static void let_transaction_grow(struct super_block *sb, ** this will block until the transaction is joinable. send the number of blocks you ** expect to use in nblocks. */ -static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb,unsigned long nblocks,int join) { - time_t now = get_seconds() ; - int old_trans_id ; - struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); - struct reiserfs_transaction_handle myth; - int sched_count = 0; - int retval; - - reiserfs_check_lock_depth(p_s_sb, "journal_begin") ; - if (nblocks > journal->j_trans_max) - BUG(); - - PROC_INFO_INC( p_s_sb, journal.journal_being ); - /* set here for journal_join */ - th->t_refcount = 1; - th->t_super = p_s_sb ; - -relock: - lock_journal(p_s_sb) ; - if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted (journal)) { - unlock_journal (p_s_sb); - retval = journal->j_errno; - goto out_fail; - } - journal->j_bcount++; - - if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { - unlock_journal(p_s_sb) ; - reiserfs_wait_on_write_block(p_s_sb) ; - PROC_INFO_INC( p_s_sb, journal.journal_relock_writers ); - goto relock ; - } - now = get_seconds(); - - /* if there is no room in the journal OR - ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning - ** we don't sleep if there aren't other writers - */ - - if ( (!join && journal->j_must_wait > 0) || - ( !join && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) || - (!join && atomic_read(&journal->j_wcount) > 0 && journal->j_trans_start_time > 0 && - (now - journal->j_trans_start_time) > journal->j_max_trans_age) || - (!join && atomic_read(&journal->j_jlock)) || - (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { - - old_trans_id = journal->j_trans_id; - unlock_journal(p_s_sb) ; /* allow others to finish this transaction */ - - if (!join && (journal->j_len_alloc + nblocks + 2) >= - journal->j_max_batch && - ((journal->j_len + nblocks + 2) * 100) < (journal->j_len_alloc * 75)) - { - if (atomic_read(&journal->j_wcount) > 10) { - sched_count++; - queue_log_writer(p_s_sb); - goto relock; - } - } - /* don't mess with joining the transaction if all we have to do is - * wait for someone else to do a commit - */ - if (atomic_read(&journal->j_jlock)) { - while (journal->j_trans_id == old_trans_id && - atomic_read(&journal->j_jlock)) { - queue_log_writer(p_s_sb); - } - goto relock; - } - retval = journal_join(&myth, p_s_sb, 1) ; - if (retval) - goto out_fail; - - /* someone might have ended the transaction while we joined */ - if (old_trans_id != journal->j_trans_id) { - retval = do_journal_end(&myth, p_s_sb, 1, 0) ; - } else { - retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW) ; - } - - if (retval) - goto out_fail; - - PROC_INFO_INC( p_s_sb, journal.journal_relock_wcount ); - goto relock ; - } - /* we are the first writer, set trans_id */ - if (journal->j_trans_start_time == 0) { - journal->j_trans_start_time = get_seconds(); - } - atomic_inc(&(journal->j_wcount)) ; - journal->j_len_alloc += nblocks ; - th->t_blocks_logged = 0 ; - th->t_blocks_allocated = nblocks ; - th->t_trans_id = journal->j_trans_id ; - unlock_journal(p_s_sb) ; - INIT_LIST_HEAD (&th->t_list); - get_fs_excl(); - return 0 ; - -out_fail: - memset (th, 0, sizeof (*th)); - /* Re-set th->t_super, so we can properly keep track of how many - * persistent transactions there are. We need to do this so if this - * call is part of a failed restart_transaction, we can free it later */ - th->t_super = p_s_sb; - return retval; -} - -struct reiserfs_transaction_handle * -reiserfs_persistent_transaction(struct super_block *s, int nblocks) { - int ret ; - struct reiserfs_transaction_handle *th ; - - /* if we're nesting into an existing transaction. It will be - ** persistent on its own - */ - if (reiserfs_transaction_running(s)) { - th = current->journal_info ; - th->t_refcount++ ; - if (th->t_refcount < 2) { - BUG() ; - } - return th ; - } - th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS, s) ; - if (!th) - return NULL; - ret = journal_begin(th, s, nblocks) ; - if (ret) { - reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ; - return NULL; - } - - SB_JOURNAL(s)->j_persistent_trans++; - return th ; -} - -int -reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) { - struct super_block *s = th->t_super; - int ret = 0; - if (th->t_trans_id) - ret = journal_end(th, th->t_super, th->t_blocks_allocated); - else - ret = -EIO; - if (th->t_refcount == 0) { - SB_JOURNAL(s)->j_persistent_trans--; - reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ; - } - return ret; -} - -static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { - struct reiserfs_transaction_handle *cur_th = current->journal_info; - - /* this keeps do_journal_end from NULLing out the current->journal_info - ** pointer - */ - th->t_handle_save = cur_th ; - if (cur_th && cur_th->t_refcount > 1) { - BUG() ; - } - return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN) ; -} - -int journal_join_abort(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { - struct reiserfs_transaction_handle *cur_th = current->journal_info; - - /* this keeps do_journal_end from NULLing out the current->journal_info - ** pointer - */ - th->t_handle_save = cur_th ; - if (cur_th && cur_th->t_refcount > 1) { - BUG() ; - } - return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT) ; -} - -int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) { - struct reiserfs_transaction_handle *cur_th = current->journal_info ; - int ret ; - - th->t_handle_save = NULL ; - if (cur_th) { - /* we are nesting into the current transaction */ - if (cur_th->t_super == p_s_sb) { - BUG_ON (!cur_th->t_refcount); - cur_th->t_refcount++ ; - memcpy(th, cur_th, sizeof(*th)); - if (th->t_refcount <= 1) - reiserfs_warning (p_s_sb, "BAD: refcount <= 1, but journal_info != 0"); - return 0; +static int do_journal_begin_r(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks, + int join) +{ + time_t now = get_seconds(); + int old_trans_id; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_transaction_handle myth; + int sched_count = 0; + int retval; + + reiserfs_check_lock_depth(p_s_sb, "journal_begin"); + if (nblocks > journal->j_trans_max) + BUG(); + + PROC_INFO_INC(p_s_sb, journal.journal_being); + /* set here for journal_join */ + th->t_refcount = 1; + th->t_super = p_s_sb; + + relock: + lock_journal(p_s_sb); + if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { + unlock_journal(p_s_sb); + retval = journal->j_errno; + goto out_fail; + } + journal->j_bcount++; + + if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { + unlock_journal(p_s_sb); + reiserfs_wait_on_write_block(p_s_sb); + PROC_INFO_INC(p_s_sb, journal.journal_relock_writers); + goto relock; + } + now = get_seconds(); + + /* if there is no room in the journal OR + ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning + ** we don't sleep if there aren't other writers + */ + + if ((!join && journal->j_must_wait > 0) || + (!join + && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) + || (!join && atomic_read(&journal->j_wcount) > 0 + && journal->j_trans_start_time > 0 + && (now - journal->j_trans_start_time) > + journal->j_max_trans_age) || (!join + && atomic_read(&journal->j_jlock)) + || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { + + old_trans_id = journal->j_trans_id; + unlock_journal(p_s_sb); /* allow others to finish this transaction */ + + if (!join && (journal->j_len_alloc + nblocks + 2) >= + journal->j_max_batch && + ((journal->j_len + nblocks + 2) * 100) < + (journal->j_len_alloc * 75)) { + if (atomic_read(&journal->j_wcount) > 10) { + sched_count++; + queue_log_writer(p_s_sb); + goto relock; + } + } + /* don't mess with joining the transaction if all we have to do is + * wait for someone else to do a commit + */ + if (atomic_read(&journal->j_jlock)) { + while (journal->j_trans_id == old_trans_id && + atomic_read(&journal->j_jlock)) { + queue_log_writer(p_s_sb); + } + goto relock; + } + retval = journal_join(&myth, p_s_sb, 1); + if (retval) + goto out_fail; + + /* someone might have ended the transaction while we joined */ + if (old_trans_id != journal->j_trans_id) { + retval = do_journal_end(&myth, p_s_sb, 1, 0); + } else { + retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW); + } + + if (retval) + goto out_fail; + + PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount); + goto relock; + } + /* we are the first writer, set trans_id */ + if (journal->j_trans_start_time == 0) { + journal->j_trans_start_time = get_seconds(); + } + atomic_inc(&(journal->j_wcount)); + journal->j_len_alloc += nblocks; + th->t_blocks_logged = 0; + th->t_blocks_allocated = nblocks; + th->t_trans_id = journal->j_trans_id; + unlock_journal(p_s_sb); + INIT_LIST_HEAD(&th->t_list); + get_fs_excl(); + return 0; + + out_fail: + memset(th, 0, sizeof(*th)); + /* Re-set th->t_super, so we can properly keep track of how many + * persistent transactions there are. We need to do this so if this + * call is part of a failed restart_transaction, we can free it later */ + th->t_super = p_s_sb; + return retval; +} + +struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct + super_block + *s, + int nblocks) +{ + int ret; + struct reiserfs_transaction_handle *th; + + /* if we're nesting into an existing transaction. It will be + ** persistent on its own + */ + if (reiserfs_transaction_running(s)) { + th = current->journal_info; + th->t_refcount++; + if (th->t_refcount < 2) { + BUG(); + } + return th; + } + th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), + GFP_NOFS, s); + if (!th) + return NULL; + ret = journal_begin(th, s, nblocks); + if (ret) { + reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), + s); + return NULL; + } + + SB_JOURNAL(s)->j_persistent_trans++; + return th; +} + +int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) +{ + struct super_block *s = th->t_super; + int ret = 0; + if (th->t_trans_id) + ret = journal_end(th, th->t_super, th->t_blocks_allocated); + else + ret = -EIO; + if (th->t_refcount == 0) { + SB_JOURNAL(s)->j_persistent_trans--; + reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), + s); + } + return ret; +} + +static int journal_join(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks) +{ + struct reiserfs_transaction_handle *cur_th = current->journal_info; + + /* this keeps do_journal_end from NULLing out the current->journal_info + ** pointer + */ + th->t_handle_save = cur_th; + if (cur_th && cur_th->t_refcount > 1) { + BUG(); + } + return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); +} + +int journal_join_abort(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks) +{ + struct reiserfs_transaction_handle *cur_th = current->journal_info; + + /* this keeps do_journal_end from NULLing out the current->journal_info + ** pointer + */ + th->t_handle_save = cur_th; + if (cur_th && cur_th->t_refcount > 1) { + BUG(); + } + return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); +} + +int journal_begin(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks) +{ + struct reiserfs_transaction_handle *cur_th = current->journal_info; + int ret; + + th->t_handle_save = NULL; + if (cur_th) { + /* we are nesting into the current transaction */ + if (cur_th->t_super == p_s_sb) { + BUG_ON(!cur_th->t_refcount); + cur_th->t_refcount++; + memcpy(th, cur_th, sizeof(*th)); + if (th->t_refcount <= 1) + reiserfs_warning(p_s_sb, + "BAD: refcount <= 1, but journal_info != 0"); + return 0; + } else { + /* we've ended up with a handle from a different filesystem. + ** save it and restore on journal_end. This should never + ** really happen... + */ + reiserfs_warning(p_s_sb, + "clm-2100: nesting info a different FS"); + th->t_handle_save = current->journal_info; + current->journal_info = th; + } } else { - /* we've ended up with a handle from a different filesystem. - ** save it and restore on journal_end. This should never - ** really happen... - */ - reiserfs_warning(p_s_sb, "clm-2100: nesting info a different FS") ; - th->t_handle_save = current->journal_info ; - current->journal_info = th; - } - } else { - current->journal_info = th; - } - ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG) ; - if (current->journal_info != th) - BUG() ; + current->journal_info = th; + } + ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); + if (current->journal_info != th) + BUG(); - /* I guess this boils down to being the reciprocal of clm-2100 above. - * If do_journal_begin_r fails, we need to put it back, since journal_end - * won't be called to do it. */ - if (ret) - current->journal_info = th->t_handle_save; - else - BUG_ON (!th->t_refcount); + /* I guess this boils down to being the reciprocal of clm-2100 above. + * If do_journal_begin_r fails, we need to put it back, since journal_end + * won't be called to do it. */ + if (ret) + current->journal_info = th->t_handle_save; + else + BUG_ON(!th->t_refcount); - return ret ; + return ret; } /* @@ -2861,129 +3139,140 @@ int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * ** ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. */ -int journal_mark_dirty(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_journal_cnode *cn = NULL; - int count_already_incd = 0 ; - int prepared = 0 ; - BUG_ON (!th->t_trans_id); - - PROC_INFO_INC( p_s_sb, journal.mark_dirty ); - if (th->t_trans_id != journal->j_trans_id) { - reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", - th->t_trans_id, journal->j_trans_id); - } - - p_s_sb->s_dirt = 1; - - prepared = test_clear_buffer_journal_prepared (bh); - clear_buffer_journal_restore_dirty (bh); - /* already in this transaction, we are done */ - if (buffer_journaled(bh)) { - PROC_INFO_INC( p_s_sb, journal.mark_dirty_already ); - return 0 ; - } - - /* this must be turned into a panic instead of a warning. We can't allow - ** a dirty or journal_dirty or locked buffer to be logged, as some changes - ** could get to disk too early. NOT GOOD. - */ - if (!prepared || buffer_dirty(bh)) { - reiserfs_warning (p_s_sb, "journal-1777: buffer %llu bad state " - "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", - (unsigned long long)bh->b_blocknr, prepared ? ' ' : '!', - buffer_locked(bh) ? ' ' : '!', - buffer_dirty(bh) ? ' ' : '!', - buffer_journal_dirty(bh) ? ' ' : '!') ; - } - - if (atomic_read(&(journal->j_wcount)) <= 0) { - reiserfs_warning (p_s_sb, "journal-1409: journal_mark_dirty returning because j_wcount was %d", atomic_read(&(journal->j_wcount))) ; - return 1 ; - } - /* this error means I've screwed up, and we've overflowed the transaction. - ** Nothing can be done here, except make the FS readonly or panic. - */ - if (journal->j_len >= journal->j_trans_max) { - reiserfs_panic(th->t_super, "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", journal->j_len) ; - } - - if (buffer_journal_dirty(bh)) { - count_already_incd = 1 ; - PROC_INFO_INC( p_s_sb, journal.mark_dirty_notjournal ); - clear_buffer_journal_dirty (bh); - } - - if (journal->j_len > journal->j_len_alloc) { - journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT ; - } - - set_buffer_journaled (bh); - - /* now put this guy on the end */ - if (!cn) { - cn = get_cnode(p_s_sb) ; - if (!cn) { - reiserfs_panic(p_s_sb, "get_cnode failed!\n"); - } - - if (th->t_blocks_logged == th->t_blocks_allocated) { - th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT ; - journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT ; - } - th->t_blocks_logged++ ; - journal->j_len++ ; - - cn->bh = bh ; - cn->blocknr = bh->b_blocknr ; - cn->sb = p_s_sb; - cn->jlist = NULL ; - insert_journal_hash(journal->j_hash_table, cn) ; - if (!count_already_incd) { - get_bh(bh) ; - } - } - cn->next = NULL ; - cn->prev = journal->j_last ; - cn->bh = bh ; - if (journal->j_last) { - journal->j_last->next = cn ; - journal->j_last = cn ; - } else { - journal->j_first = cn ; - journal->j_last = cn ; - } - return 0 ; -} - -int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { - if (!current->journal_info && th->t_refcount > 1) - reiserfs_warning (p_s_sb, "REISER-NESTING: th NULL, refcount %d", - th->t_refcount); - - if (!th->t_trans_id) { - WARN_ON (1); - return -EIO; - } - - th->t_refcount--; - if (th->t_refcount > 0) { - struct reiserfs_transaction_handle *cur_th = current->journal_info ; - - /* we aren't allowed to close a nested transaction on a different - ** filesystem from the one in the task struct - */ - if (cur_th->t_super != th->t_super) - BUG() ; - - if (th != cur_th) { - memcpy(current->journal_info, th, sizeof(*th)); - th->t_trans_id = 0; - } - return 0; - } else { - return do_journal_end(th, p_s_sb, nblocks, 0) ; - } +int journal_mark_dirty(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, struct buffer_head *bh) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_journal_cnode *cn = NULL; + int count_already_incd = 0; + int prepared = 0; + BUG_ON(!th->t_trans_id); + + PROC_INFO_INC(p_s_sb, journal.mark_dirty); + if (th->t_trans_id != journal->j_trans_id) { + reiserfs_panic(th->t_super, + "journal-1577: handle trans id %ld != current trans id %ld\n", + th->t_trans_id, journal->j_trans_id); + } + + p_s_sb->s_dirt = 1; + + prepared = test_clear_buffer_journal_prepared(bh); + clear_buffer_journal_restore_dirty(bh); + /* already in this transaction, we are done */ + if (buffer_journaled(bh)) { + PROC_INFO_INC(p_s_sb, journal.mark_dirty_already); + return 0; + } + + /* this must be turned into a panic instead of a warning. We can't allow + ** a dirty or journal_dirty or locked buffer to be logged, as some changes + ** could get to disk too early. NOT GOOD. + */ + if (!prepared || buffer_dirty(bh)) { + reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state " + "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", + (unsigned long long)bh->b_blocknr, + prepared ? ' ' : '!', + buffer_locked(bh) ? ' ' : '!', + buffer_dirty(bh) ? ' ' : '!', + buffer_journal_dirty(bh) ? ' ' : '!'); + } + + if (atomic_read(&(journal->j_wcount)) <= 0) { + reiserfs_warning(p_s_sb, + "journal-1409: journal_mark_dirty returning because j_wcount was %d", + atomic_read(&(journal->j_wcount))); + return 1; + } + /* this error means I've screwed up, and we've overflowed the transaction. + ** Nothing can be done here, except make the FS readonly or panic. + */ + if (journal->j_len >= journal->j_trans_max) { + reiserfs_panic(th->t_super, + "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", + journal->j_len); + } + + if (buffer_journal_dirty(bh)) { + count_already_incd = 1; + PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal); + clear_buffer_journal_dirty(bh); + } + + if (journal->j_len > journal->j_len_alloc) { + journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; + } + + set_buffer_journaled(bh); + + /* now put this guy on the end */ + if (!cn) { + cn = get_cnode(p_s_sb); + if (!cn) { + reiserfs_panic(p_s_sb, "get_cnode failed!\n"); + } + + if (th->t_blocks_logged == th->t_blocks_allocated) { + th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; + journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; + } + th->t_blocks_logged++; + journal->j_len++; + + cn->bh = bh; + cn->blocknr = bh->b_blocknr; + cn->sb = p_s_sb; + cn->jlist = NULL; + insert_journal_hash(journal->j_hash_table, cn); + if (!count_already_incd) { + get_bh(bh); + } + } + cn->next = NULL; + cn->prev = journal->j_last; + cn->bh = bh; + if (journal->j_last) { + journal->j_last->next = cn; + journal->j_last = cn; + } else { + journal->j_first = cn; + journal->j_last = cn; + } + return 0; +} + +int journal_end(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks) +{ + if (!current->journal_info && th->t_refcount > 1) + reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d", + th->t_refcount); + + if (!th->t_trans_id) { + WARN_ON(1); + return -EIO; + } + + th->t_refcount--; + if (th->t_refcount > 0) { + struct reiserfs_transaction_handle *cur_th = + current->journal_info; + + /* we aren't allowed to close a nested transaction on a different + ** filesystem from the one in the task struct + */ + if (cur_th->t_super != th->t_super) + BUG(); + + if (th != cur_th) { + memcpy(current->journal_info, th, sizeof(*th)); + th->t_trans_id = 0; + } + return 0; + } else { + return do_journal_end(th, p_s_sb, nblocks, 0); + } } /* removes from the current transaction, relsing and descrementing any counters. @@ -2993,47 +3282,51 @@ int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_ ** ** returns 1 if it cleaned and relsed the buffer. 0 otherwise */ -static int remove_from_transaction(struct super_block *p_s_sb, b_blocknr_t blocknr, int already_cleaned) { - struct buffer_head *bh ; - struct reiserfs_journal_cnode *cn ; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - int ret = 0; - - cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr) ; - if (!cn || !cn->bh) { - return ret ; - } - bh = cn->bh ; - if (cn->prev) { - cn->prev->next = cn->next ; - } - if (cn->next) { - cn->next->prev = cn->prev ; - } - if (cn == journal->j_first) { - journal->j_first = cn->next ; - } - if (cn == journal->j_last) { - journal->j_last = cn->prev ; - } - if (bh) - remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, bh->b_blocknr, 0) ; - clear_buffer_journaled (bh); /* don't log this one */ - - if (!already_cleaned) { - clear_buffer_journal_dirty (bh); - clear_buffer_dirty(bh); - clear_buffer_journal_test (bh); - put_bh(bh) ; - if (atomic_read(&(bh->b_count)) < 0) { - reiserfs_warning (p_s_sb, "journal-1752: remove from trans, b_count < 0"); - } - ret = 1 ; - } - journal->j_len-- ; - journal->j_len_alloc-- ; - free_cnode(p_s_sb, cn) ; - return ret ; +static int remove_from_transaction(struct super_block *p_s_sb, + b_blocknr_t blocknr, int already_cleaned) +{ + struct buffer_head *bh; + struct reiserfs_journal_cnode *cn; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + int ret = 0; + + cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); + if (!cn || !cn->bh) { + return ret; + } + bh = cn->bh; + if (cn->prev) { + cn->prev->next = cn->next; + } + if (cn->next) { + cn->next->prev = cn->prev; + } + if (cn == journal->j_first) { + journal->j_first = cn->next; + } + if (cn == journal->j_last) { + journal->j_last = cn->prev; + } + if (bh) + remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, + bh->b_blocknr, 0); + clear_buffer_journaled(bh); /* don't log this one */ + + if (!already_cleaned) { + clear_buffer_journal_dirty(bh); + clear_buffer_dirty(bh); + clear_buffer_journal_test(bh); + put_bh(bh); + if (atomic_read(&(bh->b_count)) < 0) { + reiserfs_warning(p_s_sb, + "journal-1752: remove from trans, b_count < 0"); + } + ret = 1; + } + journal->j_len--; + journal->j_len_alloc--; + free_cnode(p_s_sb, cn); + return ret; } /* @@ -3046,120 +3339,129 @@ static int remove_from_transaction(struct super_block *p_s_sb, b_blocknr_t block ** blocks for a given transaction on disk ** */ -static int can_dirty(struct reiserfs_journal_cnode *cn) { - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr ; - struct reiserfs_journal_cnode *cur = cn->hprev ; - int can_dirty = 1 ; - - /* first test hprev. These are all newer than cn, so any node here - ** with the same block number and dev means this node can't be sent - ** to disk right now. - */ - while(cur && can_dirty) { - if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && - cur->blocknr == blocknr) { - can_dirty = 0 ; - } - cur = cur->hprev ; - } - /* then test hnext. These are all older than cn. As long as they - ** are committed to the log, it is safe to write cn to disk - */ - cur = cn->hnext ; - while(cur && can_dirty) { - if (cur->jlist && cur->jlist->j_len > 0 && - atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && - cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { - can_dirty = 0 ; - } - cur = cur->hnext ; - } - return can_dirty ; +static int can_dirty(struct reiserfs_journal_cnode *cn) +{ + struct super_block *sb = cn->sb; + b_blocknr_t blocknr = cn->blocknr; + struct reiserfs_journal_cnode *cur = cn->hprev; + int can_dirty = 1; + + /* first test hprev. These are all newer than cn, so any node here + ** with the same block number and dev means this node can't be sent + ** to disk right now. + */ + while (cur && can_dirty) { + if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && + cur->blocknr == blocknr) { + can_dirty = 0; + } + cur = cur->hprev; + } + /* then test hnext. These are all older than cn. As long as they + ** are committed to the log, it is safe to write cn to disk + */ + cur = cn->hnext; + while (cur && can_dirty) { + if (cur->jlist && cur->jlist->j_len > 0 && + atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && + cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { + can_dirty = 0; + } + cur = cur->hnext; + } + return can_dirty; } /* syncs the commit blocks, but does not force the real buffers to disk ** will wait until the current transaction is done/commited before returning */ -int journal_end_sync(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); +int journal_end_sync(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); - BUG_ON (!th->t_trans_id); - /* you can sync while nested, very, very bad */ - if (th->t_refcount > 1) { - BUG() ; - } - if (journal->j_len == 0) { - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; - journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; - } - return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT) ; + BUG_ON(!th->t_trans_id); + /* you can sync while nested, very, very bad */ + if (th->t_refcount > 1) { + BUG(); + } + if (journal->j_len == 0) { + reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), + 1); + journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); + } + return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT); } /* ** writeback the pending async commits to disk */ -static void flush_async_commits(void *p) { - struct super_block *p_s_sb = p; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_journal_list *jl; - struct list_head *entry; - - lock_kernel(); - if (!list_empty(&journal->j_journal_list)) { - /* last entry is the youngest, commit it and you get everything */ - entry = journal->j_journal_list.prev; - jl = JOURNAL_LIST_ENTRY(entry); - flush_commit_list(p_s_sb, jl, 1); - } - unlock_kernel(); - /* - * this is a little racey, but there's no harm in missing - * the filemap_fdata_write - */ - if (!atomic_read(&journal->j_async_throttle) && !reiserfs_is_journal_aborted (journal)) { - atomic_inc(&journal->j_async_throttle); - filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); - atomic_dec(&journal->j_async_throttle); - } +static void flush_async_commits(void *p) +{ + struct super_block *p_s_sb = p; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_journal_list *jl; + struct list_head *entry; + + lock_kernel(); + if (!list_empty(&journal->j_journal_list)) { + /* last entry is the youngest, commit it and you get everything */ + entry = journal->j_journal_list.prev; + jl = JOURNAL_LIST_ENTRY(entry); + flush_commit_list(p_s_sb, jl, 1); + } + unlock_kernel(); + /* + * this is a little racey, but there's no harm in missing + * the filemap_fdata_write + */ + if (!atomic_read(&journal->j_async_throttle) + && !reiserfs_is_journal_aborted(journal)) { + atomic_inc(&journal->j_async_throttle); + filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); + atomic_dec(&journal->j_async_throttle); + } } /* ** flushes any old transactions to disk ** ends the current transaction if it is too old */ -int reiserfs_flush_old_commits(struct super_block *p_s_sb) { - time_t now ; - struct reiserfs_transaction_handle th ; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - - now = get_seconds(); - /* safety check so we don't flush while we are replaying the log during - * mount - */ - if (list_empty(&journal->j_journal_list)) { - return 0 ; - } - - /* check the current transaction. If there are no writers, and it is - * too old, finish it, and force the commit blocks to disk - */ - if (atomic_read(&journal->j_wcount) <= 0 && - journal->j_trans_start_time > 0 && - journal->j_len > 0 && - (now - journal->j_trans_start_time) > journal->j_max_trans_age) - { - if (!journal_join(&th, p_s_sb, 1)) { - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; - journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; - - /* we're only being called from kreiserfsd, it makes no sense to do - ** an async commit so that kreiserfsd can do it later - */ - do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ; - } - } - return p_s_sb->s_dirt; +int reiserfs_flush_old_commits(struct super_block *p_s_sb) +{ + time_t now; + struct reiserfs_transaction_handle th; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + + now = get_seconds(); + /* safety check so we don't flush while we are replaying the log during + * mount + */ + if (list_empty(&journal->j_journal_list)) { + return 0; + } + + /* check the current transaction. If there are no writers, and it is + * too old, finish it, and force the commit blocks to disk + */ + if (atomic_read(&journal->j_wcount) <= 0 && + journal->j_trans_start_time > 0 && + journal->j_len > 0 && + (now - journal->j_trans_start_time) > journal->j_max_trans_age) { + if (!journal_join(&th, p_s_sb, 1)) { + reiserfs_prepare_for_journal(p_s_sb, + SB_BUFFER_WITH_SB(p_s_sb), + 1); + journal_mark_dirty(&th, p_s_sb, + SB_BUFFER_WITH_SB(p_s_sb)); + + /* we're only being called from kreiserfsd, it makes no sense to do + ** an async commit so that kreiserfsd can do it later + */ + do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT); + } + } + return p_s_sb->s_dirt; } /* @@ -3173,101 +3475,108 @@ int reiserfs_flush_old_commits(struct super_block *p_s_sb) { ** ** Note, we can't allow the journal_end to proceed while there are still writers in the log. */ -static int check_journal_end(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, - unsigned long nblocks, int flags) { - - time_t now ; - int flush = flags & FLUSH_ALL ; - int commit_now = flags & COMMIT_NOW ; - int wait_on_commit = flags & WAIT ; - struct reiserfs_journal_list *jl; - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - - BUG_ON (!th->t_trans_id); - - if (th->t_trans_id != journal->j_trans_id) { - reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", - th->t_trans_id, journal->j_trans_id); - } - - journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged) ; - if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ - atomic_dec(&(journal->j_wcount)) ; - } - - /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released - ** will be dealt with by next transaction that actually writes something, but should be taken - ** care of in this trans - */ - if (journal->j_len == 0) { - BUG(); - } - /* if wcount > 0, and we are called to with flush or commit_now, - ** we wait on j_join_wait. We will wake up when the last writer has - ** finished the transaction, and started it on its way to the disk. - ** Then, we flush the commit or journal list, and just return 0 - ** because the rest of journal end was already done for this transaction. - */ - if (atomic_read(&(journal->j_wcount)) > 0) { - if (flush || commit_now) { - unsigned trans_id ; - - jl = journal->j_current_jl; - trans_id = jl->j_trans_id; - if (wait_on_commit) - jl->j_state |= LIST_COMMIT_PENDING; - atomic_set(&(journal->j_jlock), 1) ; - if (flush) { - journal->j_next_full_flush = 1 ; - } - unlock_journal(p_s_sb) ; - - /* sleep while the current transaction is still j_jlocked */ - while(journal->j_trans_id == trans_id) { - if (atomic_read(&journal->j_jlock)) { - queue_log_writer(p_s_sb); - } else { - lock_journal(p_s_sb); - if (journal->j_trans_id == trans_id) { - atomic_set(&(journal->j_jlock), 1) ; - } - unlock_journal(p_s_sb); - } - } - if (journal->j_trans_id == trans_id) { - BUG(); - } - if (commit_now && journal_list_still_alive(p_s_sb, trans_id) && - wait_on_commit) - { - flush_commit_list(p_s_sb, jl, 1) ; - } - return 0 ; - } - unlock_journal(p_s_sb) ; - return 0 ; - } - - /* deal with old transactions where we are the last writers */ - now = get_seconds(); - if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { - commit_now = 1 ; - journal->j_next_async_flush = 1 ; - } - /* don't batch when someone is waiting on j_join_wait */ - /* don't batch when syncing the commit or flushing the whole trans */ - if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) && !flush && !commit_now && - (journal->j_len < journal->j_max_batch) && - journal->j_len_alloc < journal->j_max_batch && journal->j_cnode_free > (journal->j_trans_max * 3)) { - journal->j_bcount++ ; - unlock_journal(p_s_sb) ; - return 0 ; - } - - if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { - reiserfs_panic(p_s_sb, "journal-003: journal_end: j_start (%ld) is too high\n", journal->j_start) ; - } - return 1 ; +static int check_journal_end(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks, + int flags) +{ + + time_t now; + int flush = flags & FLUSH_ALL; + int commit_now = flags & COMMIT_NOW; + int wait_on_commit = flags & WAIT; + struct reiserfs_journal_list *jl; + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + + BUG_ON(!th->t_trans_id); + + if (th->t_trans_id != journal->j_trans_id) { + reiserfs_panic(th->t_super, + "journal-1577: handle trans id %ld != current trans id %ld\n", + th->t_trans_id, journal->j_trans_id); + } + + journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); + if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ + atomic_dec(&(journal->j_wcount)); + } + + /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released + ** will be dealt with by next transaction that actually writes something, but should be taken + ** care of in this trans + */ + if (journal->j_len == 0) { + BUG(); + } + /* if wcount > 0, and we are called to with flush or commit_now, + ** we wait on j_join_wait. We will wake up when the last writer has + ** finished the transaction, and started it on its way to the disk. + ** Then, we flush the commit or journal list, and just return 0 + ** because the rest of journal end was already done for this transaction. + */ + if (atomic_read(&(journal->j_wcount)) > 0) { + if (flush || commit_now) { + unsigned trans_id; + + jl = journal->j_current_jl; + trans_id = jl->j_trans_id; + if (wait_on_commit) + jl->j_state |= LIST_COMMIT_PENDING; + atomic_set(&(journal->j_jlock), 1); + if (flush) { + journal->j_next_full_flush = 1; + } + unlock_journal(p_s_sb); + + /* sleep while the current transaction is still j_jlocked */ + while (journal->j_trans_id == trans_id) { + if (atomic_read(&journal->j_jlock)) { + queue_log_writer(p_s_sb); + } else { + lock_journal(p_s_sb); + if (journal->j_trans_id == trans_id) { + atomic_set(&(journal->j_jlock), + 1); + } + unlock_journal(p_s_sb); + } + } + if (journal->j_trans_id == trans_id) { + BUG(); + } + if (commit_now + && journal_list_still_alive(p_s_sb, trans_id) + && wait_on_commit) { + flush_commit_list(p_s_sb, jl, 1); + } + return 0; + } + unlock_journal(p_s_sb); + return 0; + } + + /* deal with old transactions where we are the last writers */ + now = get_seconds(); + if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { + commit_now = 1; + journal->j_next_async_flush = 1; + } + /* don't batch when someone is waiting on j_join_wait */ + /* don't batch when syncing the commit or flushing the whole trans */ + if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) + && !flush && !commit_now && (journal->j_len < journal->j_max_batch) + && journal->j_len_alloc < journal->j_max_batch + && journal->j_cnode_free > (journal->j_trans_max * 3)) { + journal->j_bcount++; + unlock_journal(p_s_sb); + return 0; + } + + if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { + reiserfs_panic(p_s_sb, + "journal-003: journal_end: j_start (%ld) is too high\n", + journal->j_start); + } + return 1; } /* @@ -3284,83 +3593,95 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, struct supe ** ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. */ -int journal_mark_freed(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, b_blocknr_t blocknr) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_journal_cnode *cn = NULL ; - struct buffer_head *bh = NULL ; - struct reiserfs_list_bitmap *jb = NULL ; - int cleaned = 0 ; - BUG_ON (!th->t_trans_id); - - cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); - if (cn && cn->bh) { - bh = cn->bh ; - get_bh(bh) ; - } - /* if it is journal new, we just remove it from this transaction */ - if (bh && buffer_journal_new(bh)) { - clear_buffer_journal_new (bh); - clear_prepared_bits(bh) ; - reiserfs_clean_and_file_buffer(bh) ; - cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; - } else { - /* set the bit for this block in the journal bitmap for this transaction */ - jb = journal->j_current_jl->j_list_bitmap; - if (!jb) { - reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ; - } - set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ; - - /* Note, the entire while loop is not allowed to schedule. */ - - if (bh) { - clear_prepared_bits(bh) ; - reiserfs_clean_and_file_buffer(bh) ; - } - cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; - - /* find all older transactions with this block, make sure they don't try to write it out */ - cn = get_journal_hash_dev(p_s_sb,journal->j_list_hash_table, blocknr) ; - while (cn) { - if (p_s_sb == cn->sb && blocknr == cn->blocknr) { - set_bit(BLOCK_FREED, &cn->state) ; - if (cn->bh) { - if (!cleaned) { - /* remove_from_transaction will brelse the buffer if it was - ** in the current trans - */ - clear_buffer_journal_dirty (cn->bh); - clear_buffer_dirty(cn->bh); - clear_buffer_journal_test(cn->bh); - cleaned = 1 ; - put_bh(cn->bh) ; - if (atomic_read(&(cn->bh->b_count)) < 0) { - reiserfs_warning (p_s_sb, "journal-2138: cn->bh->b_count < 0"); - } - } - if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ - atomic_dec(&(cn->jlist->j_nonzerolen)) ; - } - cn->bh = NULL ; - } - } - cn = cn->hnext ; - } - } - - if (bh) { - put_bh(bh) ; /* get_hash grabs the buffer */ - if (atomic_read(&(bh->b_count)) < 0) { - reiserfs_warning (p_s_sb, "journal-2165: bh->b_count < 0"); - } - } - return 0 ; -} - -void reiserfs_update_inode_transaction(struct inode *inode) { - struct reiserfs_journal *journal = SB_JOURNAL (inode->i_sb); - REISERFS_I(inode)->i_jl = journal->j_current_jl; - REISERFS_I(inode)->i_trans_id = journal->j_trans_id ; +int journal_mark_freed(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, b_blocknr_t blocknr) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_journal_cnode *cn = NULL; + struct buffer_head *bh = NULL; + struct reiserfs_list_bitmap *jb = NULL; + int cleaned = 0; + BUG_ON(!th->t_trans_id); + + cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); + if (cn && cn->bh) { + bh = cn->bh; + get_bh(bh); + } + /* if it is journal new, we just remove it from this transaction */ + if (bh && buffer_journal_new(bh)) { + clear_buffer_journal_new(bh); + clear_prepared_bits(bh); + reiserfs_clean_and_file_buffer(bh); + cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); + } else { + /* set the bit for this block in the journal bitmap for this transaction */ + jb = journal->j_current_jl->j_list_bitmap; + if (!jb) { + reiserfs_panic(p_s_sb, + "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n"); + } + set_bit_in_list_bitmap(p_s_sb, blocknr, jb); + + /* Note, the entire while loop is not allowed to schedule. */ + + if (bh) { + clear_prepared_bits(bh); + reiserfs_clean_and_file_buffer(bh); + } + cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); + + /* find all older transactions with this block, make sure they don't try to write it out */ + cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, + blocknr); + while (cn) { + if (p_s_sb == cn->sb && blocknr == cn->blocknr) { + set_bit(BLOCK_FREED, &cn->state); + if (cn->bh) { + if (!cleaned) { + /* remove_from_transaction will brelse the buffer if it was + ** in the current trans + */ + clear_buffer_journal_dirty(cn-> + bh); + clear_buffer_dirty(cn->bh); + clear_buffer_journal_test(cn-> + bh); + cleaned = 1; + put_bh(cn->bh); + if (atomic_read + (&(cn->bh->b_count)) < 0) { + reiserfs_warning(p_s_sb, + "journal-2138: cn->bh->b_count < 0"); + } + } + if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ + atomic_dec(& + (cn->jlist-> + j_nonzerolen)); + } + cn->bh = NULL; + } + } + cn = cn->hnext; + } + } + + if (bh) { + put_bh(bh); /* get_hash grabs the buffer */ + if (atomic_read(&(bh->b_count)) < 0) { + reiserfs_warning(p_s_sb, + "journal-2165: bh->b_count < 0"); + } + } + return 0; +} + +void reiserfs_update_inode_transaction(struct inode *inode) +{ + struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); + REISERFS_I(inode)->i_jl = journal->j_current_jl; + REISERFS_I(inode)->i_trans_id = journal->j_trans_id; } /* @@ -3368,99 +3689,102 @@ void reiserfs_update_inode_transaction(struct inode *inode) { * if a transaction was actually committed and the barrier was done */ static int __commit_trans_jl(struct inode *inode, unsigned long id, - struct reiserfs_journal_list *jl) + struct reiserfs_journal_list *jl) { - struct reiserfs_transaction_handle th ; - struct super_block *sb = inode->i_sb ; - struct reiserfs_journal *journal = SB_JOURNAL (sb); - int ret = 0; + struct reiserfs_transaction_handle th; + struct super_block *sb = inode->i_sb; + struct reiserfs_journal *journal = SB_JOURNAL(sb); + int ret = 0; + + /* is it from the current transaction, or from an unknown transaction? */ + if (id == journal->j_trans_id) { + jl = journal->j_current_jl; + /* try to let other writers come in and grow this transaction */ + let_transaction_grow(sb, id); + if (journal->j_trans_id != id) { + goto flush_commit_only; + } - /* is it from the current transaction, or from an unknown transaction? */ - if (id == journal->j_trans_id) { - jl = journal->j_current_jl; - /* try to let other writers come in and grow this transaction */ - let_transaction_grow(sb, id); - if (journal->j_trans_id != id) { - goto flush_commit_only; - } + ret = journal_begin(&th, sb, 1); + if (ret) + return ret; + + /* someone might have ended this transaction while we joined */ + if (journal->j_trans_id != id) { + reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), + 1); + journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); + ret = journal_end(&th, sb, 1); + goto flush_commit_only; + } - ret = journal_begin(&th, sb, 1) ; - if (ret) - return ret; + ret = journal_end_sync(&th, sb, 1); + if (!ret) + ret = 1; - /* someone might have ended this transaction while we joined */ - if (journal->j_trans_id != id) { - reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 1) ; - journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)) ; - ret = journal_end(&th, sb, 1) ; - goto flush_commit_only; + } else { + /* this gets tricky, we have to make sure the journal list in + * the inode still exists. We know the list is still around + * if we've got a larger transaction id than the oldest list + */ + flush_commit_only: + if (journal_list_still_alive(inode->i_sb, id)) { + /* + * we only set ret to 1 when we know for sure + * the barrier hasn't been started yet on the commit + * block. + */ + if (atomic_read(&jl->j_commit_left) > 1) + ret = 1; + flush_commit_list(sb, jl, 1); + if (journal->j_errno) + ret = journal->j_errno; + } } + /* otherwise the list is gone, and long since committed */ + return ret; +} - ret = journal_end_sync(&th, sb, 1) ; - if (!ret) - ret = 1; +int reiserfs_commit_for_inode(struct inode *inode) +{ + unsigned long id = REISERFS_I(inode)->i_trans_id; + struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; - } else { - /* this gets tricky, we have to make sure the journal list in - * the inode still exists. We know the list is still around - * if we've got a larger transaction id than the oldest list + /* for the whole inode, assume unset id means it was + * changed in the current transaction. More conservative */ -flush_commit_only: - if (journal_list_still_alive(inode->i_sb, id)) { - /* - * we only set ret to 1 when we know for sure - * the barrier hasn't been started yet on the commit - * block. - */ - if (atomic_read(&jl->j_commit_left) > 1) - ret = 1; - flush_commit_list(sb, jl, 1) ; - if (journal->j_errno) - ret = journal->j_errno; - } - } - /* otherwise the list is gone, and long since committed */ - return ret; -} - -int reiserfs_commit_for_inode(struct inode *inode) { - unsigned long id = REISERFS_I(inode)->i_trans_id; - struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; - - /* for the whole inode, assume unset id means it was - * changed in the current transaction. More conservative - */ - if (!id || !jl) { - reiserfs_update_inode_transaction(inode) ; - id = REISERFS_I(inode)->i_trans_id; - /* jl will be updated in __commit_trans_jl */ - } - - return __commit_trans_jl(inode, id, jl); -} - -void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, - struct buffer_head *bh) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - PROC_INFO_INC( p_s_sb, journal.restore_prepared ); - if (!bh) { - return ; - } - if (test_clear_buffer_journal_restore_dirty (bh) && - buffer_journal_dirty(bh)) { - struct reiserfs_journal_cnode *cn; - cn = get_journal_hash_dev(p_s_sb, - journal->j_list_hash_table, - bh->b_blocknr); - if (cn && can_dirty(cn)) { - set_buffer_journal_test (bh); - mark_buffer_dirty(bh); - } - } - clear_buffer_journal_prepared (bh); -} - -extern struct tree_balance *cur_tb ; + if (!id || !jl) { + reiserfs_update_inode_transaction(inode); + id = REISERFS_I(inode)->i_trans_id; + /* jl will be updated in __commit_trans_jl */ + } + + return __commit_trans_jl(inode, id, jl); +} + +void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, + struct buffer_head *bh) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + PROC_INFO_INC(p_s_sb, journal.restore_prepared); + if (!bh) { + return; + } + if (test_clear_buffer_journal_restore_dirty(bh) && + buffer_journal_dirty(bh)) { + struct reiserfs_journal_cnode *cn; + cn = get_journal_hash_dev(p_s_sb, + journal->j_list_hash_table, + bh->b_blocknr); + if (cn && can_dirty(cn)) { + set_buffer_journal_test(bh); + mark_buffer_dirty(bh); + } + } + clear_buffer_journal_prepared(bh); +} + +extern struct tree_balance *cur_tb; /* ** before we can change a metadata block, we have to make sure it won't ** be written to disk while we are altering it. So, we must: @@ -3469,39 +3793,41 @@ extern struct tree_balance *cur_tb ; ** */ int reiserfs_prepare_for_journal(struct super_block *p_s_sb, - struct buffer_head *bh, int wait) { - PROC_INFO_INC( p_s_sb, journal.prepare ); - - if (test_set_buffer_locked(bh)) { - if (!wait) - return 0; - lock_buffer(bh); - } - set_buffer_journal_prepared (bh); - if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { - clear_buffer_journal_test (bh); - set_buffer_journal_restore_dirty (bh); - } - unlock_buffer(bh); - return 1; -} - -static void flush_old_journal_lists(struct super_block *s) { - struct reiserfs_journal *journal = SB_JOURNAL (s); - struct reiserfs_journal_list *jl; - struct list_head *entry; - time_t now = get_seconds(); - - while(!list_empty(&journal->j_journal_list)) { - entry = journal->j_journal_list.next; - jl = JOURNAL_LIST_ENTRY(entry); - /* this check should always be run, to send old lists to disk */ - if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { - flush_used_journal_lists(s, jl); - } else { - break; + struct buffer_head *bh, int wait) +{ + PROC_INFO_INC(p_s_sb, journal.prepare); + + if (test_set_buffer_locked(bh)) { + if (!wait) + return 0; + lock_buffer(bh); + } + set_buffer_journal_prepared(bh); + if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { + clear_buffer_journal_test(bh); + set_buffer_journal_restore_dirty(bh); + } + unlock_buffer(bh); + return 1; +} + +static void flush_old_journal_lists(struct super_block *s) +{ + struct reiserfs_journal *journal = SB_JOURNAL(s); + struct reiserfs_journal_list *jl; + struct list_head *entry; + time_t now = get_seconds(); + + while (!list_empty(&journal->j_journal_list)) { + entry = journal->j_journal_list.next; + jl = JOURNAL_LIST_ENTRY(entry); + /* this check should always be run, to send old lists to disk */ + if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { + flush_used_journal_lists(s, jl); + } else { + break; + } } - } } /* @@ -3514,375 +3840,390 @@ static void flush_old_journal_lists(struct super_block *s) { ** If the journal is aborted, we just clean up. Things like flushing ** journal lists, etc just won't happen. */ -static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks, - int flags) { - struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); - struct reiserfs_journal_cnode *cn, *next, *jl_cn; - struct reiserfs_journal_cnode *last_cn = NULL; - struct reiserfs_journal_desc *desc ; - struct reiserfs_journal_commit *commit ; - struct buffer_head *c_bh ; /* commit bh */ - struct buffer_head *d_bh ; /* desc bh */ - int cur_write_start = 0 ; /* start index of current log write */ - int old_start ; - int i ; - int flush = flags & FLUSH_ALL ; - int wait_on_commit = flags & WAIT ; - struct reiserfs_journal_list *jl, *temp_jl; - struct list_head *entry, *safe; - unsigned long jindex; - unsigned long commit_trans_id; - int trans_half; - - BUG_ON (th->t_refcount > 1); - BUG_ON (!th->t_trans_id); - - put_fs_excl(); - current->journal_info = th->t_handle_save; - reiserfs_check_lock_depth(p_s_sb, "journal end"); - if (journal->j_len == 0) { - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; - journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; - } - - lock_journal(p_s_sb) ; - if (journal->j_next_full_flush) { - flags |= FLUSH_ALL ; - flush = 1 ; - } - if (journal->j_next_async_flush) { - flags |= COMMIT_NOW | WAIT; - wait_on_commit = 1; - } - - /* check_journal_end locks the journal, and unlocks if it does not return 1 - ** it tells us if we should continue with the journal_end, or just return - */ - if (!check_journal_end(th, p_s_sb, nblocks, flags)) { - p_s_sb->s_dirt = 1; - wake_queued_writers(p_s_sb); - reiserfs_async_progress_wait(p_s_sb); - goto out ; - } - - /* check_journal_end might set these, check again */ - if (journal->j_next_full_flush) { - flush = 1 ; - } - - /* - ** j must wait means we have to flush the log blocks, and the real blocks for - ** this transaction - */ - if (journal->j_must_wait > 0) { - flush = 1 ; - } +static int do_journal_end(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks, + int flags) +{ + struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); + struct reiserfs_journal_cnode *cn, *next, *jl_cn; + struct reiserfs_journal_cnode *last_cn = NULL; + struct reiserfs_journal_desc *desc; + struct reiserfs_journal_commit *commit; + struct buffer_head *c_bh; /* commit bh */ + struct buffer_head *d_bh; /* desc bh */ + int cur_write_start = 0; /* start index of current log write */ + int old_start; + int i; + int flush = flags & FLUSH_ALL; + int wait_on_commit = flags & WAIT; + struct reiserfs_journal_list *jl, *temp_jl; + struct list_head *entry, *safe; + unsigned long jindex; + unsigned long commit_trans_id; + int trans_half; + + BUG_ON(th->t_refcount > 1); + BUG_ON(!th->t_trans_id); + + put_fs_excl(); + current->journal_info = th->t_handle_save; + reiserfs_check_lock_depth(p_s_sb, "journal end"); + if (journal->j_len == 0) { + reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), + 1); + journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); + } + lock_journal(p_s_sb); + if (journal->j_next_full_flush) { + flags |= FLUSH_ALL; + flush = 1; + } + if (journal->j_next_async_flush) { + flags |= COMMIT_NOW | WAIT; + wait_on_commit = 1; + } + + /* check_journal_end locks the journal, and unlocks if it does not return 1 + ** it tells us if we should continue with the journal_end, or just return + */ + if (!check_journal_end(th, p_s_sb, nblocks, flags)) { + p_s_sb->s_dirt = 1; + wake_queued_writers(p_s_sb); + reiserfs_async_progress_wait(p_s_sb); + goto out; + } + + /* check_journal_end might set these, check again */ + if (journal->j_next_full_flush) { + flush = 1; + } + + /* + ** j must wait means we have to flush the log blocks, and the real blocks for + ** this transaction + */ + if (journal->j_must_wait > 0) { + flush = 1; + } #ifdef REISERFS_PREALLOCATE - /* quota ops might need to nest, setup the journal_info pointer for them */ - current->journal_info = th ; - reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into - * the transaction */ - current->journal_info = th->t_handle_save ; + /* quota ops might need to nest, setup the journal_info pointer for them */ + current->journal_info = th; + reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into + * the transaction */ + current->journal_info = th->t_handle_save; #endif - - /* setup description block */ - d_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start) ; - set_buffer_uptodate(d_bh); - desc = (struct reiserfs_journal_desc *)(d_bh)->b_data ; - memset(d_bh->b_data, 0, d_bh->b_size) ; - memcpy(get_journal_desc_magic (d_bh), JOURNAL_DESC_MAGIC, 8) ; - set_desc_trans_id(desc, journal->j_trans_id) ; - - /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ - c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + - ((journal->j_start + journal->j_len + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; - commit = (struct reiserfs_journal_commit *)c_bh->b_data ; - memset(c_bh->b_data, 0, c_bh->b_size) ; - set_commit_trans_id(commit, journal->j_trans_id) ; - set_buffer_uptodate(c_bh) ; - - /* init this journal list */ - jl = journal->j_current_jl; - - /* we lock the commit before doing anything because - * we want to make sure nobody tries to run flush_commit_list until - * the new transaction is fully setup, and we've already flushed the - * ordered bh list - */ - down(&jl->j_commit_lock); - - /* save the transaction id in case we need to commit it later */ - commit_trans_id = jl->j_trans_id; - - atomic_set(&jl->j_older_commits_done, 0) ; - jl->j_trans_id = journal->j_trans_id ; - jl->j_timestamp = journal->j_trans_start_time ; - jl->j_commit_bh = c_bh ; - jl->j_start = journal->j_start ; - jl->j_len = journal->j_len ; - atomic_set(&jl->j_nonzerolen, journal->j_len) ; - atomic_set(&jl->j_commit_left, journal->j_len + 2); - jl->j_realblock = NULL ; - - /* The ENTIRE FOR LOOP MUST not cause schedule to occur. - ** for each real block, add it to the journal list hash, - ** copy into real block index array in the commit or desc block - */ - trans_half = journal_trans_half(p_s_sb->s_blocksize); - for (i = 0, cn = journal->j_first ; cn ; cn = cn->next, i++) { - if (buffer_journaled (cn->bh)) { - jl_cn = get_cnode(p_s_sb) ; - if (!jl_cn) { - reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ; - } - if (i == 0) { - jl->j_realblock = jl_cn ; - } - jl_cn->prev = last_cn ; - jl_cn->next = NULL ; - if (last_cn) { - last_cn->next = jl_cn ; - } - last_cn = jl_cn ; - /* make sure the block we are trying to log is not a block - of journal or reserved area */ - - if (is_block_in_log_or_reserved_area(p_s_sb, cn->bh->b_blocknr)) { - reiserfs_panic(p_s_sb, "journal-2332: Trying to log block %lu, which is a log block\n", cn->bh->b_blocknr) ; - } - jl_cn->blocknr = cn->bh->b_blocknr ; - jl_cn->state = 0 ; - jl_cn->sb = p_s_sb; - jl_cn->bh = cn->bh ; - jl_cn->jlist = jl; - insert_journal_hash(journal->j_list_hash_table, jl_cn) ; - if (i < trans_half) { - desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ; - } else { - commit->j_realblock[i - trans_half] = cpu_to_le32(cn->bh->b_blocknr) ; - } - } else { - i-- ; - } - } - set_desc_trans_len(desc, journal->j_len) ; - set_desc_mount_id(desc, journal->j_mount_id) ; - set_desc_trans_id(desc, journal->j_trans_id) ; - set_commit_trans_len(commit, journal->j_len); - - /* special check in case all buffers in the journal were marked for not logging */ - if (journal->j_len == 0) { - BUG(); - } - - /* we're about to dirty all the log blocks, mark the description block - * dirty now too. Don't mark the commit block dirty until all the - * others are on disk - */ - mark_buffer_dirty(d_bh); - - /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ - cur_write_start = journal->j_start ; - cn = journal->j_first ; - jindex = 1 ; /* start at one so we don't get the desc again */ - while(cn) { - clear_buffer_journal_new (cn->bh); - /* copy all the real blocks into log area. dirty log blocks */ - if (buffer_journaled (cn->bh)) { - struct buffer_head *tmp_bh ; - char *addr; - struct page *page; - tmp_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + - ((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; - set_buffer_uptodate(tmp_bh); - page = cn->bh->b_page; - addr = kmap(page); - memcpy(tmp_bh->b_data, addr + offset_in_page(cn->bh->b_data), - cn->bh->b_size); - kunmap(page); - mark_buffer_dirty(tmp_bh); - jindex++ ; - set_buffer_journal_dirty (cn->bh); - clear_buffer_journaled (cn->bh); - } else { - /* JDirty cleared sometime during transaction. don't log this one */ - reiserfs_warning(p_s_sb, "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!") ; - brelse(cn->bh) ; - } - next = cn->next ; - free_cnode(p_s_sb, cn) ; - cn = next ; - cond_resched(); - } - - /* we are done with both the c_bh and d_bh, but - ** c_bh must be written after all other commit blocks, - ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. - */ - - journal->j_current_jl = alloc_journal_list(p_s_sb); - - /* now it is safe to insert this transaction on the main list */ - list_add_tail(&jl->j_list, &journal->j_journal_list); - list_add_tail(&jl->j_working_list, &journal->j_working_list); - journal->j_num_work_lists++; - - /* reset journal values for the next transaction */ - old_start = journal->j_start ; - journal->j_start = (journal->j_start + journal->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); - atomic_set(&(journal->j_wcount), 0) ; - journal->j_bcount = 0 ; - journal->j_last = NULL ; - journal->j_first = NULL ; - journal->j_len = 0 ; - journal->j_trans_start_time = 0 ; - journal->j_trans_id++ ; - journal->j_current_jl->j_trans_id = journal->j_trans_id; - journal->j_must_wait = 0 ; - journal->j_len_alloc = 0 ; - journal->j_next_full_flush = 0 ; - journal->j_next_async_flush = 0 ; - init_journal_hash(p_s_sb) ; - - // make sure reiserfs_add_jh sees the new current_jl before we - // write out the tails - smp_mb(); - - /* tail conversion targets have to hit the disk before we end the - * transaction. Otherwise a later transaction might repack the tail - * before this transaction commits, leaving the data block unflushed and - * clean, if we crash before the later transaction commits, the data block - * is lost. - */ - if (!list_empty(&jl->j_tail_bh_list)) { - unlock_kernel(); - write_ordered_buffers(&journal->j_dirty_buffers_lock, - journal, jl, &jl->j_tail_bh_list); - lock_kernel(); - } - if (!list_empty(&jl->j_tail_bh_list)) - BUG(); - up(&jl->j_commit_lock); - - /* honor the flush wishes from the caller, simple commits can - ** be done outside the journal lock, they are done below - ** - ** if we don't flush the commit list right now, we put it into - ** the work queue so the people waiting on the async progress work - ** queue don't wait for this proc to flush journal lists and such. - */ - if (flush) { - flush_commit_list(p_s_sb, jl, 1) ; - flush_journal_list(p_s_sb, jl, 1) ; - } else if (!(jl->j_state & LIST_COMMIT_PENDING)) - queue_delayed_work(commit_wq, &journal->j_work, HZ/10); - - - /* if the next transaction has any chance of wrapping, flush - ** transactions that might get overwritten. If any journal lists are very - ** old flush them as well. - */ -first_jl: - list_for_each_safe(entry, safe, &journal->j_journal_list) { - temp_jl = JOURNAL_LIST_ENTRY(entry); - if (journal->j_start <= temp_jl->j_start) { - if ((journal->j_start + journal->j_trans_max + 1) >= - temp_jl->j_start) - { - flush_used_journal_lists(p_s_sb, temp_jl); - goto first_jl; - } else if ((journal->j_start + - journal->j_trans_max + 1) < - SB_ONDISK_JOURNAL_SIZE(p_s_sb)) - { - /* if we don't cross into the next transaction and we don't - * wrap, there is no way we can overlap any later transactions - * break now - */ - break; - } - } else if ((journal->j_start + - journal->j_trans_max + 1) > - SB_ONDISK_JOURNAL_SIZE(p_s_sb)) - { - if (((journal->j_start + journal->j_trans_max + 1) % - SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= temp_jl->j_start) - { - flush_used_journal_lists(p_s_sb, temp_jl); - goto first_jl; - } else { - /* we don't overlap anything from out start to the end of the - * log, and our wrapped portion doesn't overlap anything at - * the start of the log. We can break - */ - break; - } - } - } - flush_old_journal_lists(p_s_sb); - - journal->j_current_jl->j_list_bitmap = get_list_bitmap(p_s_sb, journal->j_current_jl) ; - - if (!(journal->j_current_jl->j_list_bitmap)) { - reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ; - } - - atomic_set(&(journal->j_jlock), 0) ; - unlock_journal(p_s_sb) ; - /* wake up any body waiting to join. */ - clear_bit(J_WRITERS_QUEUED, &journal->j_state); - wake_up(&(journal->j_join_wait)) ; - - if (!flush && wait_on_commit && - journal_list_still_alive(p_s_sb, commit_trans_id)) { - flush_commit_list(p_s_sb, jl, 1) ; - } -out: - reiserfs_check_lock_depth(p_s_sb, "journal end2"); - - memset (th, 0, sizeof (*th)); - /* Re-set th->t_super, so we can properly keep track of how many - * persistent transactions there are. We need to do this so if this - * call is part of a failed restart_transaction, we can free it later */ - th->t_super = p_s_sb; - - return journal->j_errno; -} - -static void -__reiserfs_journal_abort_hard (struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL (sb); - if (test_bit (J_ABORTED, &journal->j_state)) - return; - - printk (KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", - reiserfs_bdevname (sb)); - - sb->s_flags |= MS_RDONLY; - set_bit (J_ABORTED, &journal->j_state); + + /* setup description block */ + d_bh = + journal_getblk(p_s_sb, + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + journal->j_start); + set_buffer_uptodate(d_bh); + desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; + memset(d_bh->b_data, 0, d_bh->b_size); + memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); + set_desc_trans_id(desc, journal->j_trans_id); + + /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ + c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + ((journal->j_start + journal->j_len + + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); + commit = (struct reiserfs_journal_commit *)c_bh->b_data; + memset(c_bh->b_data, 0, c_bh->b_size); + set_commit_trans_id(commit, journal->j_trans_id); + set_buffer_uptodate(c_bh); + + /* init this journal list */ + jl = journal->j_current_jl; + + /* we lock the commit before doing anything because + * we want to make sure nobody tries to run flush_commit_list until + * the new transaction is fully setup, and we've already flushed the + * ordered bh list + */ + down(&jl->j_commit_lock); + + /* save the transaction id in case we need to commit it later */ + commit_trans_id = jl->j_trans_id; + + atomic_set(&jl->j_older_commits_done, 0); + jl->j_trans_id = journal->j_trans_id; + jl->j_timestamp = journal->j_trans_start_time; + jl->j_commit_bh = c_bh; + jl->j_start = journal->j_start; + jl->j_len = journal->j_len; + atomic_set(&jl->j_nonzerolen, journal->j_len); + atomic_set(&jl->j_commit_left, journal->j_len + 2); + jl->j_realblock = NULL; + + /* The ENTIRE FOR LOOP MUST not cause schedule to occur. + ** for each real block, add it to the journal list hash, + ** copy into real block index array in the commit or desc block + */ + trans_half = journal_trans_half(p_s_sb->s_blocksize); + for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { + if (buffer_journaled(cn->bh)) { + jl_cn = get_cnode(p_s_sb); + if (!jl_cn) { + reiserfs_panic(p_s_sb, + "journal-1676, get_cnode returned NULL\n"); + } + if (i == 0) { + jl->j_realblock = jl_cn; + } + jl_cn->prev = last_cn; + jl_cn->next = NULL; + if (last_cn) { + last_cn->next = jl_cn; + } + last_cn = jl_cn; + /* make sure the block we are trying to log is not a block + of journal or reserved area */ + + if (is_block_in_log_or_reserved_area + (p_s_sb, cn->bh->b_blocknr)) { + reiserfs_panic(p_s_sb, + "journal-2332: Trying to log block %lu, which is a log block\n", + cn->bh->b_blocknr); + } + jl_cn->blocknr = cn->bh->b_blocknr; + jl_cn->state = 0; + jl_cn->sb = p_s_sb; + jl_cn->bh = cn->bh; + jl_cn->jlist = jl; + insert_journal_hash(journal->j_list_hash_table, jl_cn); + if (i < trans_half) { + desc->j_realblock[i] = + cpu_to_le32(cn->bh->b_blocknr); + } else { + commit->j_realblock[i - trans_half] = + cpu_to_le32(cn->bh->b_blocknr); + } + } else { + i--; + } + } + set_desc_trans_len(desc, journal->j_len); + set_desc_mount_id(desc, journal->j_mount_id); + set_desc_trans_id(desc, journal->j_trans_id); + set_commit_trans_len(commit, journal->j_len); + + /* special check in case all buffers in the journal were marked for not logging */ + if (journal->j_len == 0) { + BUG(); + } + + /* we're about to dirty all the log blocks, mark the description block + * dirty now too. Don't mark the commit block dirty until all the + * others are on disk + */ + mark_buffer_dirty(d_bh); + + /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ + cur_write_start = journal->j_start; + cn = journal->j_first; + jindex = 1; /* start at one so we don't get the desc again */ + while (cn) { + clear_buffer_journal_new(cn->bh); + /* copy all the real blocks into log area. dirty log blocks */ + if (buffer_journaled(cn->bh)) { + struct buffer_head *tmp_bh; + char *addr; + struct page *page; + tmp_bh = + journal_getblk(p_s_sb, + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + ((cur_write_start + + jindex) % + SB_ONDISK_JOURNAL_SIZE(p_s_sb))); + set_buffer_uptodate(tmp_bh); + page = cn->bh->b_page; + addr = kmap(page); + memcpy(tmp_bh->b_data, + addr + offset_in_page(cn->bh->b_data), + cn->bh->b_size); + kunmap(page); + mark_buffer_dirty(tmp_bh); + jindex++; + set_buffer_journal_dirty(cn->bh); + clear_buffer_journaled(cn->bh); + } else { + /* JDirty cleared sometime during transaction. don't log this one */ + reiserfs_warning(p_s_sb, + "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!"); + brelse(cn->bh); + } + next = cn->next; + free_cnode(p_s_sb, cn); + cn = next; + cond_resched(); + } + + /* we are done with both the c_bh and d_bh, but + ** c_bh must be written after all other commit blocks, + ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. + */ + + journal->j_current_jl = alloc_journal_list(p_s_sb); + + /* now it is safe to insert this transaction on the main list */ + list_add_tail(&jl->j_list, &journal->j_journal_list); + list_add_tail(&jl->j_working_list, &journal->j_working_list); + journal->j_num_work_lists++; + + /* reset journal values for the next transaction */ + old_start = journal->j_start; + journal->j_start = + (journal->j_start + journal->j_len + + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); + atomic_set(&(journal->j_wcount), 0); + journal->j_bcount = 0; + journal->j_last = NULL; + journal->j_first = NULL; + journal->j_len = 0; + journal->j_trans_start_time = 0; + journal->j_trans_id++; + journal->j_current_jl->j_trans_id = journal->j_trans_id; + journal->j_must_wait = 0; + journal->j_len_alloc = 0; + journal->j_next_full_flush = 0; + journal->j_next_async_flush = 0; + init_journal_hash(p_s_sb); + + // make sure reiserfs_add_jh sees the new current_jl before we + // write out the tails + smp_mb(); + + /* tail conversion targets have to hit the disk before we end the + * transaction. Otherwise a later transaction might repack the tail + * before this transaction commits, leaving the data block unflushed and + * clean, if we crash before the later transaction commits, the data block + * is lost. + */ + if (!list_empty(&jl->j_tail_bh_list)) { + unlock_kernel(); + write_ordered_buffers(&journal->j_dirty_buffers_lock, + journal, jl, &jl->j_tail_bh_list); + lock_kernel(); + } + if (!list_empty(&jl->j_tail_bh_list)) + BUG(); + up(&jl->j_commit_lock); + + /* honor the flush wishes from the caller, simple commits can + ** be done outside the journal lock, they are done below + ** + ** if we don't flush the commit list right now, we put it into + ** the work queue so the people waiting on the async progress work + ** queue don't wait for this proc to flush journal lists and such. + */ + if (flush) { + flush_commit_list(p_s_sb, jl, 1); + flush_journal_list(p_s_sb, jl, 1); + } else if (!(jl->j_state & LIST_COMMIT_PENDING)) + queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); + + /* if the next transaction has any chance of wrapping, flush + ** transactions that might get overwritten. If any journal lists are very + ** old flush them as well. + */ + first_jl: + list_for_each_safe(entry, safe, &journal->j_journal_list) { + temp_jl = JOURNAL_LIST_ENTRY(entry); + if (journal->j_start <= temp_jl->j_start) { + if ((journal->j_start + journal->j_trans_max + 1) >= + temp_jl->j_start) { + flush_used_journal_lists(p_s_sb, temp_jl); + goto first_jl; + } else if ((journal->j_start + + journal->j_trans_max + 1) < + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { + /* if we don't cross into the next transaction and we don't + * wrap, there is no way we can overlap any later transactions + * break now + */ + break; + } + } else if ((journal->j_start + + journal->j_trans_max + 1) > + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { + if (((journal->j_start + journal->j_trans_max + 1) % + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= + temp_jl->j_start) { + flush_used_journal_lists(p_s_sb, temp_jl); + goto first_jl; + } else { + /* we don't overlap anything from out start to the end of the + * log, and our wrapped portion doesn't overlap anything at + * the start of the log. We can break + */ + break; + } + } + } + flush_old_journal_lists(p_s_sb); + + journal->j_current_jl->j_list_bitmap = + get_list_bitmap(p_s_sb, journal->j_current_jl); + + if (!(journal->j_current_jl->j_list_bitmap)) { + reiserfs_panic(p_s_sb, + "journal-1996: do_journal_end, could not get a list bitmap\n"); + } + + atomic_set(&(journal->j_jlock), 0); + unlock_journal(p_s_sb); + /* wake up any body waiting to join. */ + clear_bit(J_WRITERS_QUEUED, &journal->j_state); + wake_up(&(journal->j_join_wait)); + + if (!flush && wait_on_commit && + journal_list_still_alive(p_s_sb, commit_trans_id)) { + flush_commit_list(p_s_sb, jl, 1); + } + out: + reiserfs_check_lock_depth(p_s_sb, "journal end2"); + + memset(th, 0, sizeof(*th)); + /* Re-set th->t_super, so we can properly keep track of how many + * persistent transactions there are. We need to do this so if this + * call is part of a failed restart_transaction, we can free it later */ + th->t_super = p_s_sb; + + return journal->j_errno; +} + +static void __reiserfs_journal_abort_hard(struct super_block *sb) +{ + struct reiserfs_journal *journal = SB_JOURNAL(sb); + if (test_bit(J_ABORTED, &journal->j_state)) + return; + + printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", + reiserfs_bdevname(sb)); + + sb->s_flags |= MS_RDONLY; + set_bit(J_ABORTED, &journal->j_state); #ifdef CONFIG_REISERFS_CHECK - dump_stack(); + dump_stack(); #endif } -static void -__reiserfs_journal_abort_soft (struct super_block *sb, int errno) +static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno) { - struct reiserfs_journal *journal = SB_JOURNAL (sb); - if (test_bit (J_ABORTED, &journal->j_state)) - return; + struct reiserfs_journal *journal = SB_JOURNAL(sb); + if (test_bit(J_ABORTED, &journal->j_state)) + return; - if (!journal->j_errno) - journal->j_errno = errno; + if (!journal->j_errno) + journal->j_errno = errno; - __reiserfs_journal_abort_hard (sb); + __reiserfs_journal_abort_hard(sb); } -void -reiserfs_journal_abort (struct super_block *sb, int errno) +void reiserfs_journal_abort(struct super_block *sb, int errno) { - return __reiserfs_journal_abort_soft (sb, errno); + return __reiserfs_journal_abort_soft(sb, errno); } diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index 2406608fc5c..2533c1f64ab 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c @@ -21,648 +21,709 @@ leaf_paste_entries */ - /* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */ -static void leaf_copy_dir_entries (struct buffer_info * dest_bi, struct buffer_head * source, - int last_first, int item_num, int from, int copy_count) +static void leaf_copy_dir_entries(struct buffer_info *dest_bi, + struct buffer_head *source, int last_first, + int item_num, int from, int copy_count) { - struct buffer_head * dest = dest_bi->bi_bh; - int item_num_in_dest; /* either the number of target item, - or if we must create a new item, - the number of the item we will - create it next to */ - struct item_head * ih; - struct reiserfs_de_head * deh; - int copy_records_len; /* length of all records in item to be copied */ - char * records; - - ih = B_N_PITEM_HEAD (source, item_num); - - RFALSE( !is_direntry_le_ih (ih), "vs-10000: item must be directory item"); - - /* length of all record to be copied and first byte of the last of them */ - deh = B_I_DEH (source, ih); - if (copy_count) { - copy_records_len = (from ? deh_location( &(deh[from - 1]) ) : - ih_item_len(ih)) - deh_location( &(deh[from + copy_count - 1])); - records = source->b_data + ih_location(ih) + - deh_location( &(deh[from + copy_count - 1])); - } else { - copy_records_len = 0; - records = NULL; - } - - /* when copy last to first, dest buffer can contain 0 items */ - item_num_in_dest = (last_first == LAST_TO_FIRST) ? (( B_NR_ITEMS(dest) ) ? 0 : -1) : (B_NR_ITEMS(dest) - 1); - - /* if there are no items in dest or the first/last item in dest is not item of the same directory */ - if ( (item_num_in_dest == - 1) || - (last_first == FIRST_TO_LAST && le_ih_k_offset (ih) == DOT_OFFSET) || - (last_first == LAST_TO_FIRST && comp_short_le_keys/*COMP_SHORT_KEYS*/ (&ih->ih_key, B_N_PKEY (dest, item_num_in_dest)))) { - /* create new item in dest */ - struct item_head new_ih; - - /* form item header */ - memcpy (&new_ih.ih_key, &ih->ih_key, KEY_SIZE); - put_ih_version( &new_ih, KEY_FORMAT_3_5 ); - /* calculate item len */ - put_ih_item_len( &new_ih, DEH_SIZE * copy_count + copy_records_len ); - put_ih_entry_count( &new_ih, 0 ); - - if (last_first == LAST_TO_FIRST) { - /* form key by the following way */ - if (from < I_ENTRY_COUNT(ih)) { - set_le_ih_k_offset( &new_ih, deh_offset( &(deh[from]) ) ); - /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE);*/ - } else { - /* no entries will be copied to this item in this function */ - set_le_ih_k_offset (&new_ih, U32_MAX); - /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ - } - set_le_key_k_type (KEY_FORMAT_3_5, &(new_ih.ih_key), TYPE_DIRENTRY); + struct buffer_head *dest = dest_bi->bi_bh; + int item_num_in_dest; /* either the number of target item, + or if we must create a new item, + the number of the item we will + create it next to */ + struct item_head *ih; + struct reiserfs_de_head *deh; + int copy_records_len; /* length of all records in item to be copied */ + char *records; + + ih = B_N_PITEM_HEAD(source, item_num); + + RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); + + /* length of all record to be copied and first byte of the last of them */ + deh = B_I_DEH(source, ih); + if (copy_count) { + copy_records_len = (from ? deh_location(&(deh[from - 1])) : + ih_item_len(ih)) - + deh_location(&(deh[from + copy_count - 1])); + records = + source->b_data + ih_location(ih) + + deh_location(&(deh[from + copy_count - 1])); + } else { + copy_records_len = 0; + records = NULL; + } + + /* when copy last to first, dest buffer can contain 0 items */ + item_num_in_dest = + (last_first == + LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) + - 1); + + /* if there are no items in dest or the first/last item in dest is not item of the same directory */ + if ((item_num_in_dest == -1) || + (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || + (last_first == LAST_TO_FIRST + && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key, + B_N_PKEY(dest, + item_num_in_dest)))) + { + /* create new item in dest */ + struct item_head new_ih; + + /* form item header */ + memcpy(&new_ih.ih_key, &ih->ih_key, KEY_SIZE); + put_ih_version(&new_ih, KEY_FORMAT_3_5); + /* calculate item len */ + put_ih_item_len(&new_ih, + DEH_SIZE * copy_count + copy_records_len); + put_ih_entry_count(&new_ih, 0); + + if (last_first == LAST_TO_FIRST) { + /* form key by the following way */ + if (from < I_ENTRY_COUNT(ih)) { + set_le_ih_k_offset(&new_ih, + deh_offset(&(deh[from]))); + /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */ + } else { + /* no entries will be copied to this item in this function */ + set_le_ih_k_offset(&new_ih, U32_MAX); + /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ + } + set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key), + TYPE_DIRENTRY); + } + + /* insert item into dest buffer */ + leaf_insert_into_buf(dest_bi, + (last_first == + LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest), + &new_ih, NULL, 0); + } else { + /* prepare space for entries */ + leaf_paste_in_buffer(dest_bi, + (last_first == + FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - + 1) : 0, MAX_US_INT, + DEH_SIZE * copy_count + copy_records_len, + records, 0); } - - /* insert item into dest buffer */ - leaf_insert_into_buf (dest_bi, (last_first == LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest), &new_ih, NULL, 0); - } else { - /* prepare space for entries */ - leaf_paste_in_buffer (dest_bi, (last_first==FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0, MAX_US_INT, - DEH_SIZE * copy_count + copy_records_len, records, 0 - ); - } - - item_num_in_dest = (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest)-1) : 0; - - leaf_paste_entries (dest_bi->bi_bh, item_num_in_dest, - (last_first == FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD (dest, item_num_in_dest)) : 0, - copy_count, deh + from, records, - DEH_SIZE * copy_count + copy_records_len - ); -} + item_num_in_dest = + (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0; + + leaf_paste_entries(dest_bi->bi_bh, item_num_in_dest, + (last_first == + FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest, + item_num_in_dest)) + : 0, copy_count, deh + from, records, + DEH_SIZE * copy_count + copy_records_len); +} /* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or part of it or nothing (see the return 0 below) from SOURCE to the end (if last_first) or beginning (!last_first) of the DEST */ /* returns 1 if anything was copied, else 0 */ -static int leaf_copy_boundary_item (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, - int bytes_or_entries) +static int leaf_copy_boundary_item(struct buffer_info *dest_bi, + struct buffer_head *src, int last_first, + int bytes_or_entries) { - struct buffer_head * dest = dest_bi->bi_bh; - int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ - struct item_head * ih; - struct item_head * dih; - - dest_nr_item = B_NR_ITEMS(dest); - - if ( last_first == FIRST_TO_LAST ) { - /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects - or of different types ) then there is no need to treat this item differently from the other items - that we copy, so we return */ - ih = B_N_PITEM_HEAD (src, 0); - dih = B_N_PITEM_HEAD (dest, dest_nr_item - 1); - if (!dest_nr_item || (!op_is_left_mergeable (&(ih->ih_key), src->b_size))) - /* there is nothing to merge */ - return 0; - - RFALSE( ! ih_item_len(ih), "vs-10010: item can not have empty length"); - - if ( is_direntry_le_ih (ih) ) { - if ( bytes_or_entries == -1 ) - /* copy all entries to dest */ - bytes_or_entries = ih_entry_count(ih); - leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, 0, 0, bytes_or_entries); - return 1; - } - - /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST - part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header - */ - if ( bytes_or_entries == -1 ) - bytes_or_entries = ih_item_len(ih); + struct buffer_head *dest = dest_bi->bi_bh; + int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ + struct item_head *ih; + struct item_head *dih; + + dest_nr_item = B_NR_ITEMS(dest); + + if (last_first == FIRST_TO_LAST) { + /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects + or of different types ) then there is no need to treat this item differently from the other items + that we copy, so we return */ + ih = B_N_PITEM_HEAD(src, 0); + dih = B_N_PITEM_HEAD(dest, dest_nr_item - 1); + if (!dest_nr_item + || (!op_is_left_mergeable(&(ih->ih_key), src->b_size))) + /* there is nothing to merge */ + return 0; + + RFALSE(!ih_item_len(ih), + "vs-10010: item can not have empty length"); + + if (is_direntry_le_ih(ih)) { + if (bytes_or_entries == -1) + /* copy all entries to dest */ + bytes_or_entries = ih_entry_count(ih); + leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 0, 0, + bytes_or_entries); + return 1; + } + + /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST + part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header + */ + if (bytes_or_entries == -1) + bytes_or_entries = ih_item_len(ih); #ifdef CONFIG_REISERFS_CHECK - else { - if (bytes_or_entries == ih_item_len(ih) && is_indirect_le_ih(ih)) - if (get_ih_free_space (ih)) - reiserfs_panic (NULL, "vs-10020: leaf_copy_boundary_item: " - "last unformatted node must be filled entirely (%h)", - ih); - } + else { + if (bytes_or_entries == ih_item_len(ih) + && is_indirect_le_ih(ih)) + if (get_ih_free_space(ih)) + reiserfs_panic(NULL, + "vs-10020: leaf_copy_boundary_item: " + "last unformatted node must be filled entirely (%h)", + ih); + } #endif - - /* merge first item (or its part) of src buffer with the last - item of dest buffer. Both are of the same file */ - leaf_paste_in_buffer (dest_bi, - dest_nr_item - 1, ih_item_len(dih), bytes_or_entries, B_I_PITEM(src,ih), 0 - ); - - if (is_indirect_le_ih (dih)) { - RFALSE( get_ih_free_space (dih), - "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space", - ih); - if (bytes_or_entries == ih_item_len(ih)) - set_ih_free_space (dih, get_ih_free_space (ih)); - } - - return 1; - } - - - /* copy boundary item to right (last_first == LAST_TO_FIRST) */ - - /* ( DEST is empty or last item of SOURCE and first item of DEST - are the items of different object or of different types ) - */ - src_nr_item = B_NR_ITEMS (src); - ih = B_N_PITEM_HEAD (src, src_nr_item - 1); - dih = B_N_PITEM_HEAD (dest, 0); - - if (!dest_nr_item || !op_is_left_mergeable (&(dih->ih_key), src->b_size)) - return 0; - - if ( is_direntry_le_ih (ih)) { - if ( bytes_or_entries == -1 ) - /* bytes_or_entries = entries number in last item body of SOURCE */ - bytes_or_entries = ih_entry_count(ih); - - leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, src_nr_item - 1, ih_entry_count(ih) - bytes_or_entries, bytes_or_entries); - return 1; - } - - /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST; - part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST; - don't create new item header - */ - - RFALSE( is_indirect_le_ih(ih) && get_ih_free_space (ih), - "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)", - ih); - - if ( bytes_or_entries == -1 ) { - /* bytes_or_entries = length of last item body of SOURCE */ - bytes_or_entries = ih_item_len(ih); - - RFALSE( le_ih_k_offset (dih) != - le_ih_k_offset (ih) + op_bytes_number (ih, src->b_size), - "vs-10050: items %h and %h do not match", ih, dih); - - /* change first item key of the DEST */ - set_le_ih_k_offset (dih, le_ih_k_offset (ih)); - - /* item becomes non-mergeable */ - /* or mergeable if left item was */ - set_le_ih_k_type (dih, le_ih_k_type (ih)); - } else { - /* merge to right only part of item */ - RFALSE( ih_item_len(ih) <= bytes_or_entries, - "vs-10060: no so much bytes %lu (needed %lu)", - ( unsigned long )ih_item_len(ih), ( unsigned long )bytes_or_entries); - - /* change first item key of the DEST */ - if ( is_direct_le_ih (dih) ) { - RFALSE( le_ih_k_offset (dih) <= (unsigned long)bytes_or_entries, - "vs-10070: dih %h, bytes_or_entries(%d)", dih, bytes_or_entries); - set_le_ih_k_offset (dih, le_ih_k_offset (dih) - bytes_or_entries); - } else { - RFALSE( le_ih_k_offset (dih) <= - (bytes_or_entries / UNFM_P_SIZE) * dest->b_size, - "vs-10080: dih %h, bytes_or_entries(%d)", - dih, (bytes_or_entries/UNFM_P_SIZE)*dest->b_size); - set_le_ih_k_offset (dih, le_ih_k_offset (dih) - ((bytes_or_entries / UNFM_P_SIZE) * dest->b_size)); - } - } - - leaf_paste_in_buffer (dest_bi, 0, 0, bytes_or_entries, B_I_PITEM(src,ih) + ih_item_len(ih) - bytes_or_entries, 0); - return 1; -} + /* merge first item (or its part) of src buffer with the last + item of dest buffer. Both are of the same file */ + leaf_paste_in_buffer(dest_bi, + dest_nr_item - 1, ih_item_len(dih), + bytes_or_entries, B_I_PITEM(src, ih), 0); + + if (is_indirect_le_ih(dih)) { + RFALSE(get_ih_free_space(dih), + "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space", + ih); + if (bytes_or_entries == ih_item_len(ih)) + set_ih_free_space(dih, get_ih_free_space(ih)); + } + + return 1; + } + + /* copy boundary item to right (last_first == LAST_TO_FIRST) */ + + /* ( DEST is empty or last item of SOURCE and first item of DEST + are the items of different object or of different types ) + */ + src_nr_item = B_NR_ITEMS(src); + ih = B_N_PITEM_HEAD(src, src_nr_item - 1); + dih = B_N_PITEM_HEAD(dest, 0); + + if (!dest_nr_item || !op_is_left_mergeable(&(dih->ih_key), src->b_size)) + return 0; + + if (is_direntry_le_ih(ih)) { + if (bytes_or_entries == -1) + /* bytes_or_entries = entries number in last item body of SOURCE */ + bytes_or_entries = ih_entry_count(ih); + + leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, + src_nr_item - 1, + ih_entry_count(ih) - bytes_or_entries, + bytes_or_entries); + return 1; + } + + /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST; + part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST; + don't create new item header + */ + + RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), + "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)", + ih); + + if (bytes_or_entries == -1) { + /* bytes_or_entries = length of last item body of SOURCE */ + bytes_or_entries = ih_item_len(ih); + + RFALSE(le_ih_k_offset(dih) != + le_ih_k_offset(ih) + op_bytes_number(ih, src->b_size), + "vs-10050: items %h and %h do not match", ih, dih); + + /* change first item key of the DEST */ + set_le_ih_k_offset(dih, le_ih_k_offset(ih)); + + /* item becomes non-mergeable */ + /* or mergeable if left item was */ + set_le_ih_k_type(dih, le_ih_k_type(ih)); + } else { + /* merge to right only part of item */ + RFALSE(ih_item_len(ih) <= bytes_or_entries, + "vs-10060: no so much bytes %lu (needed %lu)", + (unsigned long)ih_item_len(ih), + (unsigned long)bytes_or_entries); + + /* change first item key of the DEST */ + if (is_direct_le_ih(dih)) { + RFALSE(le_ih_k_offset(dih) <= + (unsigned long)bytes_or_entries, + "vs-10070: dih %h, bytes_or_entries(%d)", dih, + bytes_or_entries); + set_le_ih_k_offset(dih, + le_ih_k_offset(dih) - + bytes_or_entries); + } else { + RFALSE(le_ih_k_offset(dih) <= + (bytes_or_entries / UNFM_P_SIZE) * dest->b_size, + "vs-10080: dih %h, bytes_or_entries(%d)", + dih, + (bytes_or_entries / UNFM_P_SIZE) * dest->b_size); + set_le_ih_k_offset(dih, + le_ih_k_offset(dih) - + ((bytes_or_entries / UNFM_P_SIZE) * + dest->b_size)); + } + } + + leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries, + B_I_PITEM(src, + ih) + ih_item_len(ih) - bytes_or_entries, + 0); + return 1; +} /* copy cpy_mun items from buffer src to buffer dest * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest */ -static void leaf_copy_items_entirely (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, - int first, int cpy_num) +static void leaf_copy_items_entirely(struct buffer_info *dest_bi, + struct buffer_head *src, int last_first, + int first, int cpy_num) { - struct buffer_head * dest; - int nr, free_space; - int dest_before; - int last_loc, last_inserted_loc, location; - int i, j; - struct block_head * blkh; - struct item_head * ih; - - RFALSE( last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST, - "vs-10090: bad last_first parameter %d", last_first); - RFALSE( B_NR_ITEMS (src) - first < cpy_num, - "vs-10100: too few items in source %d, required %d from %d", - B_NR_ITEMS(src), cpy_num, first); - RFALSE( cpy_num < 0, "vs-10110: can not copy negative amount of items"); - RFALSE( ! dest_bi, "vs-10120: can not copy negative amount of items"); - - dest = dest_bi->bi_bh; - - RFALSE( ! dest, "vs-10130: can not copy negative amount of items"); - - if (cpy_num == 0) - return; - - blkh = B_BLK_HEAD(dest); - nr = blkh_nr_item( blkh ); - free_space = blkh_free_space(blkh); - - /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ - dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; - - /* location of head of first new item */ - ih = B_N_PITEM_HEAD (dest, dest_before); - - RFALSE( blkh_free_space(blkh) < cpy_num * IH_SIZE, - "vs-10140: not enough free space for headers %d (needed %d)", - B_FREE_SPACE (dest), cpy_num * IH_SIZE); - - /* prepare space for headers */ - memmove (ih + cpy_num, ih, (nr-dest_before) * IH_SIZE); - - /* copy item headers */ - memcpy (ih, B_N_PITEM_HEAD (src, first), cpy_num * IH_SIZE); - - free_space -= (IH_SIZE * cpy_num); - set_blkh_free_space( blkh, free_space ); - - /* location of unmovable item */ - j = location = (dest_before == 0) ? dest->b_size : ih_location(ih-1); - for (i = dest_before; i < nr + cpy_num; i ++) { - location -= ih_item_len( ih + i - dest_before ); - put_ih_location( ih + i - dest_before, location ); - } - - /* prepare space for items */ - last_loc = ih_location( &(ih[nr+cpy_num-1-dest_before]) ); - last_inserted_loc = ih_location( &(ih[cpy_num-1]) ); - - /* check free space */ - RFALSE( free_space < j - last_inserted_loc, - "vs-10150: not enough free space for items %d (needed %d)", - free_space, j - last_inserted_loc); - - memmove (dest->b_data + last_loc, - dest->b_data + last_loc + j - last_inserted_loc, - last_inserted_loc - last_loc); - - /* copy items */ - memcpy (dest->b_data + last_inserted_loc, B_N_PITEM(src,(first + cpy_num - 1)), - j - last_inserted_loc); - - /* sizes, item number */ - set_blkh_nr_item( blkh, nr + cpy_num ); - set_blkh_free_space( blkh, free_space - (j - last_inserted_loc) ); - - do_balance_mark_leaf_dirty (dest_bi->tb, dest, 0); - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position); - RFALSE( dc_block_number(t_dc) != dest->b_blocknr, - "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu", - ( long unsigned ) dest->b_blocknr, - ( long unsigned ) dc_block_number(t_dc)); - put_dc_size( t_dc, dc_size(t_dc) + (j - last_inserted_loc + IH_SIZE * cpy_num ) ); - - do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent, 0); - } -} + struct buffer_head *dest; + int nr, free_space; + int dest_before; + int last_loc, last_inserted_loc, location; + int i, j; + struct block_head *blkh; + struct item_head *ih; + + RFALSE(last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST, + "vs-10090: bad last_first parameter %d", last_first); + RFALSE(B_NR_ITEMS(src) - first < cpy_num, + "vs-10100: too few items in source %d, required %d from %d", + B_NR_ITEMS(src), cpy_num, first); + RFALSE(cpy_num < 0, "vs-10110: can not copy negative amount of items"); + RFALSE(!dest_bi, "vs-10120: can not copy negative amount of items"); + + dest = dest_bi->bi_bh; + + RFALSE(!dest, "vs-10130: can not copy negative amount of items"); + + if (cpy_num == 0) + return; + + blkh = B_BLK_HEAD(dest); + nr = blkh_nr_item(blkh); + free_space = blkh_free_space(blkh); + + /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ + dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; + + /* location of head of first new item */ + ih = B_N_PITEM_HEAD(dest, dest_before); + + RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE, + "vs-10140: not enough free space for headers %d (needed %d)", + B_FREE_SPACE(dest), cpy_num * IH_SIZE); + + /* prepare space for headers */ + memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE); + /* copy item headers */ + memcpy(ih, B_N_PITEM_HEAD(src, first), cpy_num * IH_SIZE); + + free_space -= (IH_SIZE * cpy_num); + set_blkh_free_space(blkh, free_space); + + /* location of unmovable item */ + j = location = (dest_before == 0) ? dest->b_size : ih_location(ih - 1); + for (i = dest_before; i < nr + cpy_num; i++) { + location -= ih_item_len(ih + i - dest_before); + put_ih_location(ih + i - dest_before, location); + } + + /* prepare space for items */ + last_loc = ih_location(&(ih[nr + cpy_num - 1 - dest_before])); + last_inserted_loc = ih_location(&(ih[cpy_num - 1])); + + /* check free space */ + RFALSE(free_space < j - last_inserted_loc, + "vs-10150: not enough free space for items %d (needed %d)", + free_space, j - last_inserted_loc); + + memmove(dest->b_data + last_loc, + dest->b_data + last_loc + j - last_inserted_loc, + last_inserted_loc - last_loc); + + /* copy items */ + memcpy(dest->b_data + last_inserted_loc, + B_N_PITEM(src, (first + cpy_num - 1)), j - last_inserted_loc); + + /* sizes, item number */ + set_blkh_nr_item(blkh, nr + cpy_num); + set_blkh_free_space(blkh, free_space - (j - last_inserted_loc)); + + do_balance_mark_leaf_dirty(dest_bi->tb, dest, 0); + + if (dest_bi->bi_parent) { + struct disk_child *t_dc; + t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); + RFALSE(dc_block_number(t_dc) != dest->b_blocknr, + "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu", + (long unsigned)dest->b_blocknr, + (long unsigned)dc_block_number(t_dc)); + put_dc_size(t_dc, + dc_size(t_dc) + (j - last_inserted_loc + + IH_SIZE * cpy_num)); + + do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, + 0); + } +} /* This function splits the (liquid) item into two items (useful when shifting part of an item into another node.) */ -static void leaf_item_bottle (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, - int item_num, int cpy_bytes) +static void leaf_item_bottle(struct buffer_info *dest_bi, + struct buffer_head *src, int last_first, + int item_num, int cpy_bytes) { - struct buffer_head * dest = dest_bi->bi_bh; - struct item_head * ih; - - RFALSE( cpy_bytes == -1, "vs-10170: bytes == - 1 means: do not split item"); - - if ( last_first == FIRST_TO_LAST ) { - /* if ( if item in position item_num in buffer SOURCE is directory item ) */ - if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(src,item_num))) - leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, item_num, 0, cpy_bytes); - else { - struct item_head n_ih; - - /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST - part defined by 'cpy_bytes'; create new item header; change old item_header (????); - n_ih = new item_header; - */ - memcpy (&n_ih, ih, IH_SIZE); - put_ih_item_len( &n_ih, cpy_bytes ); - if (is_indirect_le_ih (ih)) { - RFALSE( cpy_bytes == ih_item_len(ih) && get_ih_free_space(ih), - "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)", - ( long unsigned ) get_ih_free_space (ih)); - set_ih_free_space (&n_ih, 0); - } - - RFALSE( op_is_left_mergeable (&(ih->ih_key), src->b_size), - "vs-10190: bad mergeability of item %h", ih); - n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ - leaf_insert_into_buf (dest_bi, B_NR_ITEMS(dest), &n_ih, B_N_PITEM (src, item_num), 0); + struct buffer_head *dest = dest_bi->bi_bh; + struct item_head *ih; + + RFALSE(cpy_bytes == -1, + "vs-10170: bytes == - 1 means: do not split item"); + + if (last_first == FIRST_TO_LAST) { + /* if ( if item in position item_num in buffer SOURCE is directory item ) */ + if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) + leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, + item_num, 0, cpy_bytes); + else { + struct item_head n_ih; + + /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST + part defined by 'cpy_bytes'; create new item header; change old item_header (????); + n_ih = new item_header; + */ + memcpy(&n_ih, ih, IH_SIZE); + put_ih_item_len(&n_ih, cpy_bytes); + if (is_indirect_le_ih(ih)) { + RFALSE(cpy_bytes == ih_item_len(ih) + && get_ih_free_space(ih), + "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)", + (long unsigned)get_ih_free_space(ih)); + set_ih_free_space(&n_ih, 0); + } + + RFALSE(op_is_left_mergeable(&(ih->ih_key), src->b_size), + "vs-10190: bad mergeability of item %h", ih); + n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ + leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih, + B_N_PITEM(src, item_num), 0); + } + } else { + /* if ( if item in position item_num in buffer SOURCE is directory item ) */ + if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) + leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, + item_num, + I_ENTRY_COUNT(ih) - cpy_bytes, + cpy_bytes); + else { + struct item_head n_ih; + + /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST + part defined by 'cpy_bytes'; create new item header; + n_ih = new item_header; + */ + memcpy(&n_ih, ih, SHORT_KEY_SIZE); + + n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ + + if (is_direct_le_ih(ih)) { + set_le_ih_k_offset(&n_ih, + le_ih_k_offset(ih) + + ih_item_len(ih) - cpy_bytes); + set_le_ih_k_type(&n_ih, TYPE_DIRECT); + set_ih_free_space(&n_ih, MAX_US_INT); + } else { + /* indirect item */ + RFALSE(!cpy_bytes && get_ih_free_space(ih), + "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended"); + set_le_ih_k_offset(&n_ih, + le_ih_k_offset(ih) + + (ih_item_len(ih) - + cpy_bytes) / UNFM_P_SIZE * + dest->b_size); + set_le_ih_k_type(&n_ih, TYPE_INDIRECT); + set_ih_free_space(&n_ih, get_ih_free_space(ih)); + } + + /* set item length */ + put_ih_item_len(&n_ih, cpy_bytes); + + n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ + + leaf_insert_into_buf(dest_bi, 0, &n_ih, + B_N_PITEM(src, + item_num) + + ih_item_len(ih) - cpy_bytes, 0); + } } - } else { - /* if ( if item in position item_num in buffer SOURCE is directory item ) */ - if (is_direntry_le_ih(ih = B_N_PITEM_HEAD (src, item_num))) - leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, item_num, I_ENTRY_COUNT(ih) - cpy_bytes, cpy_bytes); - else { - struct item_head n_ih; - - /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST - part defined by 'cpy_bytes'; create new item header; - n_ih = new item_header; - */ - memcpy (&n_ih, ih, SHORT_KEY_SIZE); - - n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ - - if (is_direct_le_ih (ih)) { - set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + ih_item_len(ih) - cpy_bytes); - set_le_ih_k_type (&n_ih, TYPE_DIRECT); - set_ih_free_space (&n_ih, MAX_US_INT); - } else { - /* indirect item */ - RFALSE( !cpy_bytes && get_ih_free_space (ih), - "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended"); - set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + (ih_item_len(ih) - cpy_bytes) / UNFM_P_SIZE * dest->b_size); - set_le_ih_k_type (&n_ih, TYPE_INDIRECT); - set_ih_free_space (&n_ih, get_ih_free_space (ih)); - } - - /* set item length */ - put_ih_item_len( &n_ih, cpy_bytes ); - - n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ - - leaf_insert_into_buf (dest_bi, 0, &n_ih, B_N_PITEM(src,item_num) + ih_item_len(ih) - cpy_bytes, 0); - } - } } - /* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. From last item copy cpy_num bytes for regular item and cpy_num directory entries for directory item. */ -static int leaf_copy_items (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, int cpy_num, - int cpy_bytes) +static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, + int last_first, int cpy_num, int cpy_bytes) { - struct buffer_head * dest; - int pos, i, src_nr_item, bytes; - - dest = dest_bi->bi_bh; - RFALSE( !dest || !src, "vs-10210: !dest || !src"); - RFALSE( last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, - "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST"); - RFALSE( B_NR_ITEMS(src) < cpy_num, - "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src), cpy_num); - RFALSE( cpy_num < 0,"vs-10240: cpy_num < 0 (%d)", cpy_num); - - if ( cpy_num == 0 ) - return 0; - - if ( last_first == FIRST_TO_LAST ) { - /* copy items to left */ - pos = 0; - if ( cpy_num == 1 ) - bytes = cpy_bytes; - else - bytes = -1; - - /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ - i = leaf_copy_boundary_item (dest_bi, src, FIRST_TO_LAST, bytes); - cpy_num -= i; - if ( cpy_num == 0 ) - return i; - pos += i; - if ( cpy_bytes == -1 ) - /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ - leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num); - else { - /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ - leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num-1); - - /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ - leaf_item_bottle (dest_bi, src, FIRST_TO_LAST, cpy_num+pos-1, cpy_bytes); - } - } else { - /* copy items to right */ - src_nr_item = B_NR_ITEMS (src); - if ( cpy_num == 1 ) - bytes = cpy_bytes; - else - bytes = -1; - - /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ - i = leaf_copy_boundary_item (dest_bi, src, LAST_TO_FIRST, bytes); - - cpy_num -= i; - if ( cpy_num == 0 ) - return i; - - pos = src_nr_item - cpy_num - i; - if ( cpy_bytes == -1 ) { - /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ - leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos, cpy_num); - } else { - /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ - leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos+1, cpy_num-1); - - /* copy part of the item which number is pos to the begin of the DEST */ - leaf_item_bottle (dest_bi, src, LAST_TO_FIRST, pos, cpy_bytes); - } - } - return i; + struct buffer_head *dest; + int pos, i, src_nr_item, bytes; + + dest = dest_bi->bi_bh; + RFALSE(!dest || !src, "vs-10210: !dest || !src"); + RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, + "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST"); + RFALSE(B_NR_ITEMS(src) < cpy_num, + "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src), + cpy_num); + RFALSE(cpy_num < 0, "vs-10240: cpy_num < 0 (%d)", cpy_num); + + if (cpy_num == 0) + return 0; + + if (last_first == FIRST_TO_LAST) { + /* copy items to left */ + pos = 0; + if (cpy_num == 1) + bytes = cpy_bytes; + else + bytes = -1; + + /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ + i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); + cpy_num -= i; + if (cpy_num == 0) + return i; + pos += i; + if (cpy_bytes == -1) + /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ + leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, + pos, cpy_num); + else { + /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ + leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, + pos, cpy_num - 1); + + /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ + leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, + cpy_num + pos - 1, cpy_bytes); + } + } else { + /* copy items to right */ + src_nr_item = B_NR_ITEMS(src); + if (cpy_num == 1) + bytes = cpy_bytes; + else + bytes = -1; + + /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ + i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); + + cpy_num -= i; + if (cpy_num == 0) + return i; + + pos = src_nr_item - cpy_num - i; + if (cpy_bytes == -1) { + /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ + leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, + pos, cpy_num); + } else { + /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ + leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, + pos + 1, cpy_num - 1); + + /* copy part of the item which number is pos to the begin of the DEST */ + leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, + cpy_bytes); + } + } + return i; } - /* there are types of coping: from S[0] to L[0], from S[0] to R[0], from R[0] to L[0]. for each of these we have to define parent and positions of destination and source buffers */ -static void leaf_define_dest_src_infos (int shift_mode, struct tree_balance * tb, struct buffer_info * dest_bi, - struct buffer_info * src_bi, int * first_last, - struct buffer_head * Snew) +static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, + struct buffer_info *dest_bi, + struct buffer_info *src_bi, + int *first_last, + struct buffer_head *Snew) { - memset (dest_bi, 0, sizeof (struct buffer_info)); - memset (src_bi, 0, sizeof (struct buffer_info)); - - /* define dest, src, dest parent, dest position */ - switch (shift_mode) { - case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */ - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); - src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); /* src->b_item_order */ - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[0]; - dest_bi->bi_parent = tb->FL[0]; - dest_bi->bi_position = get_left_neighbor_position (tb, 0); - *first_last = FIRST_TO_LAST; - break; - - case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */ - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); - src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[0]; - dest_bi->bi_parent = tb->FR[0]; - dest_bi->bi_position = get_right_neighbor_position (tb, 0); - *first_last = LAST_TO_FIRST; - break; - - case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */ - src_bi->tb = tb; - src_bi->bi_bh = tb->R[0]; - src_bi->bi_parent = tb->FR[0]; - src_bi->bi_position = get_right_neighbor_position (tb, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[0]; - dest_bi->bi_parent = tb->FL[0]; - dest_bi->bi_position = get_left_neighbor_position (tb, 0); - *first_last = FIRST_TO_LAST; - break; - - case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */ - src_bi->tb = tb; - src_bi->bi_bh = tb->L[0]; - src_bi->bi_parent = tb->FL[0]; - src_bi->bi_position = get_left_neighbor_position (tb, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[0]; - dest_bi->bi_parent = tb->FR[0]; - dest_bi->bi_position = get_right_neighbor_position (tb, 0); - *first_last = LAST_TO_FIRST; - break; - - case LEAF_FROM_S_TO_SNEW: - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); - src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = Snew; - dest_bi->bi_parent = NULL; - dest_bi->bi_position = 0; - *first_last = LAST_TO_FIRST; - break; - - default: - reiserfs_panic (NULL, "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", shift_mode); - } - RFALSE( src_bi->bi_bh == 0 || dest_bi->bi_bh == 0, - "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", - shift_mode, src_bi->bi_bh, dest_bi->bi_bh); + memset(dest_bi, 0, sizeof(struct buffer_info)); + memset(src_bi, 0, sizeof(struct buffer_info)); + + /* define dest, src, dest parent, dest position */ + switch (shift_mode) { + case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */ + src_bi->tb = tb; + src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); + src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); + src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); /* src->b_item_order */ + dest_bi->tb = tb; + dest_bi->bi_bh = tb->L[0]; + dest_bi->bi_parent = tb->FL[0]; + dest_bi->bi_position = get_left_neighbor_position(tb, 0); + *first_last = FIRST_TO_LAST; + break; + + case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */ + src_bi->tb = tb; + src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); + src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); + src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->R[0]; + dest_bi->bi_parent = tb->FR[0]; + dest_bi->bi_position = get_right_neighbor_position(tb, 0); + *first_last = LAST_TO_FIRST; + break; + + case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */ + src_bi->tb = tb; + src_bi->bi_bh = tb->R[0]; + src_bi->bi_parent = tb->FR[0]; + src_bi->bi_position = get_right_neighbor_position(tb, 0); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->L[0]; + dest_bi->bi_parent = tb->FL[0]; + dest_bi->bi_position = get_left_neighbor_position(tb, 0); + *first_last = FIRST_TO_LAST; + break; + + case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */ + src_bi->tb = tb; + src_bi->bi_bh = tb->L[0]; + src_bi->bi_parent = tb->FL[0]; + src_bi->bi_position = get_left_neighbor_position(tb, 0); + dest_bi->tb = tb; + dest_bi->bi_bh = tb->R[0]; + dest_bi->bi_parent = tb->FR[0]; + dest_bi->bi_position = get_right_neighbor_position(tb, 0); + *first_last = LAST_TO_FIRST; + break; + + case LEAF_FROM_S_TO_SNEW: + src_bi->tb = tb; + src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); + src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); + src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); + dest_bi->tb = tb; + dest_bi->bi_bh = Snew; + dest_bi->bi_parent = NULL; + dest_bi->bi_position = 0; + *first_last = LAST_TO_FIRST; + break; + + default: + reiserfs_panic(NULL, + "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", + shift_mode); + } + RFALSE(src_bi->bi_bh == 0 || dest_bi->bi_bh == 0, + "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", + shift_mode, src_bi->bi_bh, dest_bi->bi_bh); } - - - /* copy mov_num items and mov_bytes of the (mov_num-1)th item to neighbor. Delete them from source */ -int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew) +int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, + int mov_bytes, struct buffer_head *Snew) { - int ret_value; - struct buffer_info dest_bi, src_bi; - int first_last; + int ret_value; + struct buffer_info dest_bi, src_bi; + int first_last; - leaf_define_dest_src_infos (shift_mode, tb, &dest_bi, &src_bi, &first_last, Snew); + leaf_define_dest_src_infos(shift_mode, tb, &dest_bi, &src_bi, + &first_last, Snew); - ret_value = leaf_copy_items (&dest_bi, src_bi.bi_bh, first_last, mov_num, mov_bytes); + ret_value = + leaf_copy_items(&dest_bi, src_bi.bi_bh, first_last, mov_num, + mov_bytes); - leaf_delete_items (&src_bi, first_last, (first_last == FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) - mov_num), mov_num, mov_bytes); + leaf_delete_items(&src_bi, first_last, + (first_last == + FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) - + mov_num), mov_num, mov_bytes); - - return ret_value; + return ret_value; } - /* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) from S[0] to L[0] and replace the delimiting key */ -int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes) +int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) { - struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); - int i; + struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); + int i; - /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ - i = leaf_move_items (LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); + /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ + i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); - if ( shift_num ) { - if (B_NR_ITEMS (S0) == 0) { /* number of items in S[0] == 0 */ + if (shift_num) { + if (B_NR_ITEMS(S0) == 0) { /* number of items in S[0] == 0 */ - RFALSE( shift_bytes != -1, - "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", - shift_bytes); + RFALSE(shift_bytes != -1, + "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", + shift_bytes); #ifdef CONFIG_REISERFS_CHECK - if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { - print_cur_tb ("vs-10275"); - reiserfs_panic (tb->tb_sb, "vs-10275: leaf_shift_left: balance condition corrupted (%c)", tb->tb_mode); - } + if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { + print_cur_tb("vs-10275"); + reiserfs_panic(tb->tb_sb, + "vs-10275: leaf_shift_left: balance condition corrupted (%c)", + tb->tb_mode); + } #endif - if (PATH_H_POSITION (tb->tb_path, 1) == 0) - replace_key (tb, tb->CFL[0], tb->lkey[0], PATH_H_PPARENT (tb->tb_path, 0), 0); - - } else { - /* replace lkey in CFL[0] by 0-th key from S[0]; */ - replace_key (tb, tb->CFL[0], tb->lkey[0], S0, 0); - - RFALSE( (shift_bytes != -1 && - !(is_direntry_le_ih (B_N_PITEM_HEAD (S0, 0)) - && !I_ENTRY_COUNT (B_N_PITEM_HEAD (S0, 0)))) && - (!op_is_left_mergeable (B_N_PKEY (S0, 0), S0->b_size)), - "vs-10280: item must be mergeable"); - } - } - - return i; -} - - - + if (PATH_H_POSITION(tb->tb_path, 1) == 0) + replace_key(tb, tb->CFL[0], tb->lkey[0], + PATH_H_PPARENT(tb->tb_path, 0), 0); + + } else { + /* replace lkey in CFL[0] by 0-th key from S[0]; */ + replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0); + + RFALSE((shift_bytes != -1 && + !(is_direntry_le_ih(B_N_PITEM_HEAD(S0, 0)) + && !I_ENTRY_COUNT(B_N_PITEM_HEAD(S0, 0)))) && + (!op_is_left_mergeable + (B_N_PKEY(S0, 0), S0->b_size)), + "vs-10280: item must be mergeable"); + } + } + return i; +} /* CLEANING STOPPED HERE */ - - - /* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ -int leaf_shift_right( - struct tree_balance * tb, - int shift_num, - int shift_bytes - ) +int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) { - // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); - int ret_value; + // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); + int ret_value; - /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ - ret_value = leaf_move_items (LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); + /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ + ret_value = + leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); - /* replace rkey in CFR[0] by the 0-th key from R[0] */ - if (shift_num) { - replace_key (tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); + /* replace rkey in CFR[0] by the 0-th key from R[0] */ + if (shift_num) { + replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); - } + } - return ret_value; + return ret_value; } - - -static void leaf_delete_items_entirely (struct buffer_info * bi, - int first, int del_num); +static void leaf_delete_items_entirely(struct buffer_info *bi, + int first, int del_num); /* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. If not. If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of @@ -670,287 +731,292 @@ static void leaf_delete_items_entirely (struct buffer_info * bi, If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of the last item . Part defined by del_bytes. Don't delete last item header. */ -void leaf_delete_items (struct buffer_info * cur_bi, int last_first, - int first, int del_num, int del_bytes) +void leaf_delete_items(struct buffer_info *cur_bi, int last_first, + int first, int del_num, int del_bytes) { - struct buffer_head * bh; - int item_amount = B_NR_ITEMS (bh = cur_bi->bi_bh); - - RFALSE( !bh, "10155: bh is not defined"); - RFALSE( del_num < 0, "10160: del_num can not be < 0. del_num==%d", del_num); - RFALSE( first < 0 || first + del_num > item_amount, - "10165: invalid number of first item to be deleted (%d) or " - "no so much items (%d) to delete (only %d)", - first, first + del_num, item_amount); - - if ( del_num == 0 ) - return; - - if ( first == 0 && del_num == item_amount && del_bytes == -1 ) { - make_empty_node (cur_bi); - do_balance_mark_leaf_dirty (cur_bi->tb, bh, 0); - return; - } - - if ( del_bytes == -1 ) - /* delete del_num items beginning from item in position first */ - leaf_delete_items_entirely (cur_bi, first, del_num); - else { - if ( last_first == FIRST_TO_LAST ) { - /* delete del_num-1 items beginning from item in position first */ - leaf_delete_items_entirely (cur_bi, first, del_num-1); - - /* delete the part of the first item of the bh - do not delete item header - */ - leaf_cut_from_buffer (cur_bi, 0, 0, del_bytes); - } else { - struct item_head * ih; - int len; - - /* delete del_num-1 items beginning from item in position first+1 */ - leaf_delete_items_entirely (cur_bi, first+1, del_num-1); - - if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh)-1))) /* the last item is directory */ - /* len = numbers of directory entries in this item */ - len = ih_entry_count(ih); - else - /* len = body len of item */ - len = ih_item_len(ih); - - /* delete the part of the last item of the bh - do not delete item header - */ - leaf_cut_from_buffer (cur_bi, B_NR_ITEMS(bh)-1, len - del_bytes, del_bytes); + struct buffer_head *bh; + int item_amount = B_NR_ITEMS(bh = cur_bi->bi_bh); + + RFALSE(!bh, "10155: bh is not defined"); + RFALSE(del_num < 0, "10160: del_num can not be < 0. del_num==%d", + del_num); + RFALSE(first < 0 + || first + del_num > item_amount, + "10165: invalid number of first item to be deleted (%d) or " + "no so much items (%d) to delete (only %d)", first, + first + del_num, item_amount); + + if (del_num == 0) + return; + + if (first == 0 && del_num == item_amount && del_bytes == -1) { + make_empty_node(cur_bi); + do_balance_mark_leaf_dirty(cur_bi->tb, bh, 0); + return; } - } -} + if (del_bytes == -1) + /* delete del_num items beginning from item in position first */ + leaf_delete_items_entirely(cur_bi, first, del_num); + else { + if (last_first == FIRST_TO_LAST) { + /* delete del_num-1 items beginning from item in position first */ + leaf_delete_items_entirely(cur_bi, first, del_num - 1); + + /* delete the part of the first item of the bh + do not delete item header + */ + leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); + } else { + struct item_head *ih; + int len; + + /* delete del_num-1 items beginning from item in position first+1 */ + leaf_delete_items_entirely(cur_bi, first + 1, + del_num - 1); + + if (is_direntry_le_ih + (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1))) + /* the last item is directory */ + /* len = numbers of directory entries in this item */ + len = ih_entry_count(ih); + else + /* len = body len of item */ + len = ih_item_len(ih); + + /* delete the part of the last item of the bh + do not delete item header + */ + leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, + len - del_bytes, del_bytes); + } + } +} /* insert item into the leaf node in position before */ -void leaf_insert_into_buf (struct buffer_info * bi, int before, - struct item_head * inserted_item_ih, - const char * inserted_item_body, - int zeros_number) +void leaf_insert_into_buf(struct buffer_info *bi, int before, + struct item_head *inserted_item_ih, + const char *inserted_item_body, int zeros_number) { - struct buffer_head * bh = bi->bi_bh; - int nr, free_space; - struct block_head * blkh; - struct item_head * ih; - int i; - int last_loc, unmoved_loc; - char * to; - - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - free_space = blkh_free_space( blkh ); - - /* check free space */ - RFALSE( free_space < ih_item_len(inserted_item_ih) + IH_SIZE, - "vs-10170: not enough free space in block %z, new item %h", - bh, inserted_item_ih); - RFALSE( zeros_number > ih_item_len(inserted_item_ih), - "vs-10172: zero number == %d, item length == %d", - zeros_number, ih_item_len(inserted_item_ih)); - - - /* get item new item must be inserted before */ - ih = B_N_PITEM_HEAD (bh, before); - - /* prepare space for the body of new item */ - last_loc = nr ? ih_location( &(ih[nr - before - 1]) ) : bh->b_size; - unmoved_loc = before ? ih_location( ih-1 ) : bh->b_size; - - - memmove (bh->b_data + last_loc - ih_item_len(inserted_item_ih), - bh->b_data + last_loc, unmoved_loc - last_loc); - - to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih); - memset (to, 0, zeros_number); - to += zeros_number; - - /* copy body to prepared space */ - if (inserted_item_body) - memmove (to, inserted_item_body, ih_item_len(inserted_item_ih) - zeros_number); - else - memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number); - - /* insert item header */ - memmove (ih + 1, ih, IH_SIZE * (nr - before)); - memmove (ih, inserted_item_ih, IH_SIZE); - - /* change locations */ - for (i = before; i < nr + 1; i ++) - { - unmoved_loc -= ih_item_len( &(ih[i-before])); - put_ih_location( &(ih[i-before]), unmoved_loc ); - } - - /* sizes, free space, item number */ - set_blkh_nr_item( blkh, blkh_nr_item(blkh) + 1 ); - set_blkh_free_space( blkh, - free_space - (IH_SIZE + ih_item_len(inserted_item_ih ) ) ); - do_balance_mark_leaf_dirty (bi->tb, bh, 1); - - if (bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position); - put_dc_size( t_dc, dc_size(t_dc) + (IH_SIZE + ih_item_len(inserted_item_ih))); - do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0); - } -} + struct buffer_head *bh = bi->bi_bh; + int nr, free_space; + struct block_head *blkh; + struct item_head *ih; + int i; + int last_loc, unmoved_loc; + char *to; + + blkh = B_BLK_HEAD(bh); + nr = blkh_nr_item(blkh); + free_space = blkh_free_space(blkh); + + /* check free space */ + RFALSE(free_space < ih_item_len(inserted_item_ih) + IH_SIZE, + "vs-10170: not enough free space in block %z, new item %h", + bh, inserted_item_ih); + RFALSE(zeros_number > ih_item_len(inserted_item_ih), + "vs-10172: zero number == %d, item length == %d", + zeros_number, ih_item_len(inserted_item_ih)); + + /* get item new item must be inserted before */ + ih = B_N_PITEM_HEAD(bh, before); + + /* prepare space for the body of new item */ + last_loc = nr ? ih_location(&(ih[nr - before - 1])) : bh->b_size; + unmoved_loc = before ? ih_location(ih - 1) : bh->b_size; + + memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih), + bh->b_data + last_loc, unmoved_loc - last_loc); + + to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih); + memset(to, 0, zeros_number); + to += zeros_number; + + /* copy body to prepared space */ + if (inserted_item_body) + memmove(to, inserted_item_body, + ih_item_len(inserted_item_ih) - zeros_number); + else + memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number); + + /* insert item header */ + memmove(ih + 1, ih, IH_SIZE * (nr - before)); + memmove(ih, inserted_item_ih, IH_SIZE); + + /* change locations */ + for (i = before; i < nr + 1; i++) { + unmoved_loc -= ih_item_len(&(ih[i - before])); + put_ih_location(&(ih[i - before]), unmoved_loc); + } + /* sizes, free space, item number */ + set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1); + set_blkh_free_space(blkh, + free_space - (IH_SIZE + + ih_item_len(inserted_item_ih))); + do_balance_mark_leaf_dirty(bi->tb, bh, 1); + + if (bi->bi_parent) { + struct disk_child *t_dc; + t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position); + put_dc_size(t_dc, + dc_size(t_dc) + (IH_SIZE + + ih_item_len(inserted_item_ih))); + do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); + } +} /* paste paste_size bytes to affected_item_num-th item. When item is a directory, this only prepare space for new entries */ -void leaf_paste_in_buffer (struct buffer_info * bi, int affected_item_num, - int pos_in_item, int paste_size, - const char * body, - int zeros_number) +void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, + int pos_in_item, int paste_size, + const char *body, int zeros_number) { - struct buffer_head * bh = bi->bi_bh; - int nr, free_space; - struct block_head * blkh; - struct item_head * ih; - int i; - int last_loc, unmoved_loc; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - free_space = blkh_free_space(blkh); - - - /* check free space */ - RFALSE( free_space < paste_size, - "vs-10175: not enough free space: needed %d, available %d", - paste_size, free_space); + struct buffer_head *bh = bi->bi_bh; + int nr, free_space; + struct block_head *blkh; + struct item_head *ih; + int i; + int last_loc, unmoved_loc; + + blkh = B_BLK_HEAD(bh); + nr = blkh_nr_item(blkh); + free_space = blkh_free_space(blkh); + + /* check free space */ + RFALSE(free_space < paste_size, + "vs-10175: not enough free space: needed %d, available %d", + paste_size, free_space); #ifdef CONFIG_REISERFS_CHECK - if (zeros_number > paste_size) { - print_cur_tb ("10177"); - reiserfs_panic ( NULL, "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d", - zeros_number, paste_size); - } -#endif /* CONFIG_REISERFS_CHECK */ - - - /* item to be appended */ - ih = B_N_PITEM_HEAD(bh, affected_item_num); - - last_loc = ih_location( &(ih[nr - affected_item_num - 1]) ); - unmoved_loc = affected_item_num ? ih_location( ih-1 ) : bh->b_size; - - /* prepare space */ - memmove (bh->b_data + last_loc - paste_size, bh->b_data + last_loc, - unmoved_loc - last_loc); - - - /* change locations */ - for (i = affected_item_num; i < nr; i ++) - put_ih_location( &(ih[i-affected_item_num]), - ih_location( &(ih[i-affected_item_num])) - paste_size ); - - if ( body ) { - if (!is_direntry_le_ih (ih)) { - if (!pos_in_item) { - /* shift data to right */ - memmove (bh->b_data + ih_location(ih) + paste_size, - bh->b_data + ih_location(ih), ih_item_len(ih)); - /* paste data in the head of item */ - memset (bh->b_data + ih_location(ih), 0, zeros_number); - memcpy (bh->b_data + ih_location(ih) + zeros_number, body, paste_size - zeros_number); - } else { - memset (bh->b_data + unmoved_loc - paste_size, 0, zeros_number); - memcpy (bh->b_data + unmoved_loc - paste_size + zeros_number, body, paste_size - zeros_number); - } + if (zeros_number > paste_size) { + print_cur_tb("10177"); + reiserfs_panic(NULL, + "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d", + zeros_number, paste_size); + } +#endif /* CONFIG_REISERFS_CHECK */ + + /* item to be appended */ + ih = B_N_PITEM_HEAD(bh, affected_item_num); + + last_loc = ih_location(&(ih[nr - affected_item_num - 1])); + unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size; + + /* prepare space */ + memmove(bh->b_data + last_loc - paste_size, bh->b_data + last_loc, + unmoved_loc - last_loc); + + /* change locations */ + for (i = affected_item_num; i < nr; i++) + put_ih_location(&(ih[i - affected_item_num]), + ih_location(&(ih[i - affected_item_num])) - + paste_size); + + if (body) { + if (!is_direntry_le_ih(ih)) { + if (!pos_in_item) { + /* shift data to right */ + memmove(bh->b_data + ih_location(ih) + + paste_size, + bh->b_data + ih_location(ih), + ih_item_len(ih)); + /* paste data in the head of item */ + memset(bh->b_data + ih_location(ih), 0, + zeros_number); + memcpy(bh->b_data + ih_location(ih) + + zeros_number, body, + paste_size - zeros_number); + } else { + memset(bh->b_data + unmoved_loc - paste_size, 0, + zeros_number); + memcpy(bh->b_data + unmoved_loc - paste_size + + zeros_number, body, + paste_size - zeros_number); + } + } + } else + memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size); + + put_ih_item_len(ih, ih_item_len(ih) + paste_size); + + /* change free space */ + set_blkh_free_space(blkh, free_space - paste_size); + + do_balance_mark_leaf_dirty(bi->tb, bh, 0); + + if (bi->bi_parent) { + struct disk_child *t_dc = + B_N_CHILD(bi->bi_parent, bi->bi_position); + put_dc_size(t_dc, dc_size(t_dc) + paste_size); + do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); } - } - else - memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size); - - put_ih_item_len( ih, ih_item_len(ih) + paste_size ); - - /* change free space */ - set_blkh_free_space( blkh, free_space - paste_size ); - - do_balance_mark_leaf_dirty (bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position); - put_dc_size( t_dc, dc_size(t_dc) + paste_size ); - do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0); - } } - /* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item does not have free space, so it moves DEHs and remaining records as necessary. Return value is size of removed part of directory item in bytes. */ -static int leaf_cut_entries ( - struct buffer_head * bh, - struct item_head * ih, - int from, - int del_count - ) +static int leaf_cut_entries(struct buffer_head *bh, + struct item_head *ih, int from, int del_count) { - char * item; - struct reiserfs_de_head * deh; - int prev_record_offset; /* offset of record, that is (from-1)th */ - char * prev_record; /* */ - int cut_records_len; /* length of all removed records */ - int i; - - - /* make sure, that item is directory and there are enough entries to - remove */ - RFALSE( !is_direntry_le_ih (ih), "10180: item is not directory item"); - RFALSE( I_ENTRY_COUNT(ih) < from + del_count, - "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", - I_ENTRY_COUNT(ih), from, del_count); - - if (del_count == 0) - return 0; - - /* first byte of item */ - item = bh->b_data + ih_location(ih); - - /* entry head array */ - deh = B_I_DEH (bh, ih); - - /* first byte of remaining entries, those are BEFORE cut entries - (prev_record) and length of all removed records (cut_records_len) */ - prev_record_offset = (from ? deh_location( &(deh[from - 1])) : ih_item_len(ih)); - cut_records_len = prev_record_offset/*from_record*/ - - deh_location( &(deh[from + del_count - 1])); - prev_record = item + prev_record_offset; - - - /* adjust locations of remaining entries */ - for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i --) - put_deh_location( &(deh[i]), - deh_location( &deh[i] ) - (DEH_SIZE * del_count ) ); - - for (i = 0; i < from; i ++) - put_deh_location( &(deh[i]), - deh_location( &deh[i] ) - (DEH_SIZE * del_count + cut_records_len) ); - - put_ih_entry_count( ih, ih_entry_count(ih) - del_count ); - - /* shift entry head array and entries those are AFTER removed entries */ - memmove ((char *)(deh + from), - deh + from + del_count, - prev_record - cut_records_len - (char *)(deh + from + del_count)); - - /* shift records, those are BEFORE removed entries */ - memmove (prev_record - cut_records_len - DEH_SIZE * del_count, - prev_record, item + ih_item_len(ih) - prev_record); - - return DEH_SIZE * del_count + cut_records_len; + char *item; + struct reiserfs_de_head *deh; + int prev_record_offset; /* offset of record, that is (from-1)th */ + char *prev_record; /* */ + int cut_records_len; /* length of all removed records */ + int i; + + /* make sure, that item is directory and there are enough entries to + remove */ + RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); + RFALSE(I_ENTRY_COUNT(ih) < from + del_count, + "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", + I_ENTRY_COUNT(ih), from, del_count); + + if (del_count == 0) + return 0; + + /* first byte of item */ + item = bh->b_data + ih_location(ih); + + /* entry head array */ + deh = B_I_DEH(bh, ih); + + /* first byte of remaining entries, those are BEFORE cut entries + (prev_record) and length of all removed records (cut_records_len) */ + prev_record_offset = + (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih)); + cut_records_len = prev_record_offset /*from_record */ - + deh_location(&(deh[from + del_count - 1])); + prev_record = item + prev_record_offset; + + /* adjust locations of remaining entries */ + for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i--) + put_deh_location(&(deh[i]), + deh_location(&deh[i]) - + (DEH_SIZE * del_count)); + + for (i = 0; i < from; i++) + put_deh_location(&(deh[i]), + deh_location(&deh[i]) - (DEH_SIZE * del_count + + cut_records_len)); + + put_ih_entry_count(ih, ih_entry_count(ih) - del_count); + + /* shift entry head array and entries those are AFTER removed entries */ + memmove((char *)(deh + from), + deh + from + del_count, + prev_record - cut_records_len - (char *)(deh + from + + del_count)); + + /* shift records, those are BEFORE removed entries */ + memmove(prev_record - cut_records_len - DEH_SIZE * del_count, + prev_record, item + ih_item_len(ih) - prev_record); + + return DEH_SIZE * del_count + cut_records_len; } - /* when cut item is part of regular file pos_in_item - first byte that must be cut cut_size - number of bytes to be cut beginning from pos_in_item @@ -959,264 +1025,278 @@ static int leaf_cut_entries ( pos_in_item - number of first deleted entry cut_size - count of deleted entries */ -void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num, - int pos_in_item, int cut_size) +void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, + int pos_in_item, int cut_size) { - int nr; - struct buffer_head * bh = bi->bi_bh; - struct block_head * blkh; - struct item_head * ih; - int last_loc, unmoved_loc; - int i; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - - /* item head of truncated item */ - ih = B_N_PITEM_HEAD (bh, cut_item_num); - - if (is_direntry_le_ih (ih)) { - /* first cut entry ()*/ - cut_size = leaf_cut_entries (bh, ih, pos_in_item, cut_size); - if (pos_in_item == 0) { - /* change key */ - RFALSE( cut_item_num, - "when 0-th enrty of item is cut, that item must be first in the node, not %d-th", cut_item_num); - /* change item key by key of first entry in the item */ - set_le_ih_k_offset (ih, deh_offset(B_I_DEH (bh, ih))); - /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE);*/ - } - } else { - /* item is direct or indirect */ - RFALSE( is_statdata_le_ih (ih), "10195: item is stat data"); - RFALSE( pos_in_item && pos_in_item + cut_size != ih_item_len(ih), - "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)", - ( long unsigned ) pos_in_item, ( long unsigned ) cut_size, - ( long unsigned ) ih_item_len (ih)); - - /* shift item body to left if cut is from the head of item */ - if (pos_in_item == 0) { - memmove( bh->b_data + ih_location(ih), - bh->b_data + ih_location(ih) + cut_size, - ih_item_len(ih) - cut_size); - - /* change key of item */ - if (is_direct_le_ih (ih)) - set_le_ih_k_offset (ih, le_ih_k_offset (ih) + cut_size); - else { - set_le_ih_k_offset (ih, le_ih_k_offset (ih) + (cut_size / UNFM_P_SIZE) * bh->b_size); - RFALSE( ih_item_len(ih) == cut_size && get_ih_free_space (ih), - "10205: invalid ih_free_space (%h)", ih); - } - } - } - - - /* location of the last item */ - last_loc = ih_location( &(ih[nr - cut_item_num - 1]) ); - - /* location of the item, which is remaining at the same place */ - unmoved_loc = cut_item_num ? ih_location(ih-1) : bh->b_size; - - - /* shift */ - memmove (bh->b_data + last_loc + cut_size, bh->b_data + last_loc, - unmoved_loc - last_loc - cut_size); - - /* change item length */ - put_ih_item_len( ih, ih_item_len(ih) - cut_size ); - - if (is_indirect_le_ih (ih)) { - if (pos_in_item) - set_ih_free_space (ih, 0); - } - - /* change locations */ - for (i = cut_item_num; i < nr; i ++) - put_ih_location( &(ih[i-cut_item_num]), ih_location( &ih[i-cut_item_num]) + cut_size ); - - /* size, free space */ - set_blkh_free_space( blkh, blkh_free_space(blkh) + cut_size ); - - do_balance_mark_leaf_dirty (bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position); - put_dc_size( t_dc, dc_size(t_dc) - cut_size ); - do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0); - } -} + int nr; + struct buffer_head *bh = bi->bi_bh; + struct block_head *blkh; + struct item_head *ih; + int last_loc, unmoved_loc; + int i; + + blkh = B_BLK_HEAD(bh); + nr = blkh_nr_item(blkh); + + /* item head of truncated item */ + ih = B_N_PITEM_HEAD(bh, cut_item_num); + + if (is_direntry_le_ih(ih)) { + /* first cut entry () */ + cut_size = leaf_cut_entries(bh, ih, pos_in_item, cut_size); + if (pos_in_item == 0) { + /* change key */ + RFALSE(cut_item_num, + "when 0-th enrty of item is cut, that item must be first in the node, not %d-th", + cut_item_num); + /* change item key by key of first entry in the item */ + set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); + /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */ + } + } else { + /* item is direct or indirect */ + RFALSE(is_statdata_le_ih(ih), "10195: item is stat data"); + RFALSE(pos_in_item && pos_in_item + cut_size != ih_item_len(ih), + "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)", + (long unsigned)pos_in_item, (long unsigned)cut_size, + (long unsigned)ih_item_len(ih)); + + /* shift item body to left if cut is from the head of item */ + if (pos_in_item == 0) { + memmove(bh->b_data + ih_location(ih), + bh->b_data + ih_location(ih) + cut_size, + ih_item_len(ih) - cut_size); + + /* change key of item */ + if (is_direct_le_ih(ih)) + set_le_ih_k_offset(ih, + le_ih_k_offset(ih) + + cut_size); + else { + set_le_ih_k_offset(ih, + le_ih_k_offset(ih) + + (cut_size / UNFM_P_SIZE) * + bh->b_size); + RFALSE(ih_item_len(ih) == cut_size + && get_ih_free_space(ih), + "10205: invalid ih_free_space (%h)", ih); + } + } + } + + /* location of the last item */ + last_loc = ih_location(&(ih[nr - cut_item_num - 1])); + + /* location of the item, which is remaining at the same place */ + unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size; + + /* shift */ + memmove(bh->b_data + last_loc + cut_size, bh->b_data + last_loc, + unmoved_loc - last_loc - cut_size); + + /* change item length */ + put_ih_item_len(ih, ih_item_len(ih) - cut_size); + if (is_indirect_le_ih(ih)) { + if (pos_in_item) + set_ih_free_space(ih, 0); + } + + /* change locations */ + for (i = cut_item_num; i < nr; i++) + put_ih_location(&(ih[i - cut_item_num]), + ih_location(&ih[i - cut_item_num]) + cut_size); + + /* size, free space */ + set_blkh_free_space(blkh, blkh_free_space(blkh) + cut_size); + + do_balance_mark_leaf_dirty(bi->tb, bh, 0); + + if (bi->bi_parent) { + struct disk_child *t_dc; + t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position); + put_dc_size(t_dc, dc_size(t_dc) - cut_size); + do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); + } +} /* delete del_num items from buffer starting from the first'th item */ -static void leaf_delete_items_entirely (struct buffer_info * bi, - int first, int del_num) +static void leaf_delete_items_entirely(struct buffer_info *bi, + int first, int del_num) { - struct buffer_head * bh = bi->bi_bh; - int nr; - int i, j; - int last_loc, last_removed_loc; - struct block_head * blkh; - struct item_head * ih; - - RFALSE( bh == NULL, "10210: buffer is 0"); - RFALSE( del_num < 0, "10215: del_num less than 0 (%d)", del_num); - - if (del_num == 0) - return; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - - RFALSE( first < 0 || first + del_num > nr, - "10220: first=%d, number=%d, there is %d items", first, del_num, nr); - - if (first == 0 && del_num == nr) { - /* this does not work */ - make_empty_node (bi); - - do_balance_mark_leaf_dirty (bi->tb, bh, 0); - return; - } - - ih = B_N_PITEM_HEAD (bh, first); - - /* location of unmovable item */ - j = (first == 0) ? bh->b_size : ih_location(ih-1); - - /* delete items */ - last_loc = ih_location( &(ih[nr-1-first]) ); - last_removed_loc = ih_location( &(ih[del_num-1]) ); - - memmove (bh->b_data + last_loc + j - last_removed_loc, - bh->b_data + last_loc, last_removed_loc - last_loc); - - /* delete item headers */ - memmove (ih, ih + del_num, (nr - first - del_num) * IH_SIZE); - - /* change item location */ - for (i = first; i < nr - del_num; i ++) - put_ih_location( &(ih[i-first]), ih_location( &(ih[i-first]) ) + (j - last_removed_loc) ); - - /* sizes, item number */ - set_blkh_nr_item( blkh, blkh_nr_item(blkh) - del_num ); - set_blkh_free_space( blkh, blkh_free_space(blkh) + (j - last_removed_loc + IH_SIZE * del_num) ); - - do_balance_mark_leaf_dirty (bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position); - put_dc_size( t_dc, dc_size(t_dc) - - (j - last_removed_loc + IH_SIZE * del_num)); - do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0); - } -} + struct buffer_head *bh = bi->bi_bh; + int nr; + int i, j; + int last_loc, last_removed_loc; + struct block_head *blkh; + struct item_head *ih; + + RFALSE(bh == NULL, "10210: buffer is 0"); + RFALSE(del_num < 0, "10215: del_num less than 0 (%d)", del_num); + + if (del_num == 0) + return; + + blkh = B_BLK_HEAD(bh); + nr = blkh_nr_item(blkh); + RFALSE(first < 0 || first + del_num > nr, + "10220: first=%d, number=%d, there is %d items", first, del_num, + nr); + + if (first == 0 && del_num == nr) { + /* this does not work */ + make_empty_node(bi); + + do_balance_mark_leaf_dirty(bi->tb, bh, 0); + return; + } + ih = B_N_PITEM_HEAD(bh, first); + /* location of unmovable item */ + j = (first == 0) ? bh->b_size : ih_location(ih - 1); + /* delete items */ + last_loc = ih_location(&(ih[nr - 1 - first])); + last_removed_loc = ih_location(&(ih[del_num - 1])); + + memmove(bh->b_data + last_loc + j - last_removed_loc, + bh->b_data + last_loc, last_removed_loc - last_loc); + + /* delete item headers */ + memmove(ih, ih + del_num, (nr - first - del_num) * IH_SIZE); + + /* change item location */ + for (i = first; i < nr - del_num; i++) + put_ih_location(&(ih[i - first]), + ih_location(&(ih[i - first])) + (j - + last_removed_loc)); + + /* sizes, item number */ + set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num); + set_blkh_free_space(blkh, + blkh_free_space(blkh) + (j - last_removed_loc + + IH_SIZE * del_num)); + + do_balance_mark_leaf_dirty(bi->tb, bh, 0); + + if (bi->bi_parent) { + struct disk_child *t_dc = + B_N_CHILD(bi->bi_parent, bi->bi_position); + put_dc_size(t_dc, + dc_size(t_dc) - (j - last_removed_loc + + IH_SIZE * del_num)); + do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); + } +} /* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ -void leaf_paste_entries ( - struct buffer_head * bh, +void leaf_paste_entries(struct buffer_head *bh, int item_num, int before, int new_entry_count, - struct reiserfs_de_head * new_dehs, - const char * records, - int paste_size - ) + struct reiserfs_de_head *new_dehs, + const char *records, int paste_size) { - struct item_head * ih; - char * item; - struct reiserfs_de_head * deh; - char * insert_point; - int i, old_entry_num; - - if (new_entry_count == 0) - return; - - ih = B_N_PITEM_HEAD(bh, item_num); - - /* make sure, that item is directory, and there are enough records in it */ - RFALSE( !is_direntry_le_ih (ih), "10225: item is not directory item"); - RFALSE( I_ENTRY_COUNT (ih) < before, - "10230: there are no entry we paste entries before. entry_count = %d, before = %d", - I_ENTRY_COUNT (ih), before); - - - /* first byte of dest item */ - item = bh->b_data + ih_location(ih); - - /* entry head array */ - deh = B_I_DEH (bh, ih); - - /* new records will be pasted at this point */ - insert_point = item + (before ? deh_location( &(deh[before - 1])) : (ih_item_len(ih) - paste_size)); - - /* adjust locations of records that will be AFTER new records */ - for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i --) - put_deh_location( &(deh[i]), - deh_location(&(deh[i])) + (DEH_SIZE * new_entry_count )); - - /* adjust locations of records that will be BEFORE new records */ - for (i = 0; i < before; i ++) - put_deh_location( &(deh[i]), deh_location(&(deh[i])) + paste_size ); - - old_entry_num = I_ENTRY_COUNT(ih); - put_ih_entry_count( ih, ih_entry_count(ih) + new_entry_count ); - - /* prepare space for pasted records */ - memmove (insert_point + paste_size, insert_point, item + (ih_item_len(ih) - paste_size) - insert_point); - - /* copy new records */ - memcpy (insert_point + DEH_SIZE * new_entry_count, records, - paste_size - DEH_SIZE * new_entry_count); - - /* prepare space for new entry heads */ - deh += before; - memmove ((char *)(deh + new_entry_count), deh, insert_point - (char *)deh); - - /* copy new entry heads */ - deh = (struct reiserfs_de_head *)((char *)deh); - memcpy (deh, new_dehs, DEH_SIZE * new_entry_count); - - /* set locations of new records */ - for (i = 0; i < new_entry_count; i ++) - { - put_deh_location( &(deh[i]), - deh_location( &(deh[i] )) + - (- deh_location( &(new_dehs[new_entry_count - 1])) + - insert_point + DEH_SIZE * new_entry_count - item)); - } - - - /* change item key if necessary (when we paste before 0-th entry */ - if (!before) - { - set_le_ih_k_offset (ih, deh_offset(new_dehs)); + struct item_head *ih; + char *item; + struct reiserfs_de_head *deh; + char *insert_point; + int i, old_entry_num; + + if (new_entry_count == 0) + return; + + ih = B_N_PITEM_HEAD(bh, item_num); + + /* make sure, that item is directory, and there are enough records in it */ + RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); + RFALSE(I_ENTRY_COUNT(ih) < before, + "10230: there are no entry we paste entries before. entry_count = %d, before = %d", + I_ENTRY_COUNT(ih), before); + + /* first byte of dest item */ + item = bh->b_data + ih_location(ih); + + /* entry head array */ + deh = B_I_DEH(bh, ih); + + /* new records will be pasted at this point */ + insert_point = + item + + (before ? deh_location(&(deh[before - 1])) + : (ih_item_len(ih) - paste_size)); + + /* adjust locations of records that will be AFTER new records */ + for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i--) + put_deh_location(&(deh[i]), + deh_location(&(deh[i])) + + (DEH_SIZE * new_entry_count)); + + /* adjust locations of records that will be BEFORE new records */ + for (i = 0; i < before; i++) + put_deh_location(&(deh[i]), + deh_location(&(deh[i])) + paste_size); + + old_entry_num = I_ENTRY_COUNT(ih); + put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count); + + /* prepare space for pasted records */ + memmove(insert_point + paste_size, insert_point, + item + (ih_item_len(ih) - paste_size) - insert_point); + + /* copy new records */ + memcpy(insert_point + DEH_SIZE * new_entry_count, records, + paste_size - DEH_SIZE * new_entry_count); + + /* prepare space for new entry heads */ + deh += before; + memmove((char *)(deh + new_entry_count), deh, + insert_point - (char *)deh); + + /* copy new entry heads */ + deh = (struct reiserfs_de_head *)((char *)deh); + memcpy(deh, new_dehs, DEH_SIZE * new_entry_count); + + /* set locations of new records */ + for (i = 0; i < new_entry_count; i++) { + put_deh_location(&(deh[i]), + deh_location(&(deh[i])) + + (-deh_location + (&(new_dehs[new_entry_count - 1])) + + insert_point + DEH_SIZE * new_entry_count - + item)); + } + + /* change item key if necessary (when we paste before 0-th entry */ + if (!before) { + set_le_ih_k_offset(ih, deh_offset(new_dehs)); /* memcpy (&ih->ih_key.k_offset, &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ - } - + } #ifdef CONFIG_REISERFS_CHECK - { - int prev, next; - /* check record locations */ - deh = B_I_DEH (bh, ih); - for (i = 0; i < I_ENTRY_COUNT(ih); i ++) { - next = (i < I_ENTRY_COUNT(ih) - 1) ? deh_location( &(deh[i + 1])) : 0; - prev = (i != 0) ? deh_location( &(deh[i - 1]) ) : 0; - - if (prev && prev <= deh_location( &(deh[i]))) - reiserfs_warning (NULL, "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)", - ih, deh + i - 1, i, deh + i); - if (next && next >= deh_location( &(deh[i]))) - reiserfs_warning (NULL, "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)", - ih, i, deh + i, deh + i + 1); - } - } + { + int prev, next; + /* check record locations */ + deh = B_I_DEH(bh, ih); + for (i = 0; i < I_ENTRY_COUNT(ih); i++) { + next = + (i < + I_ENTRY_COUNT(ih) - + 1) ? deh_location(&(deh[i + 1])) : 0; + prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0; + + if (prev && prev <= deh_location(&(deh[i]))) + reiserfs_warning(NULL, + "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)", + ih, deh + i - 1, i, deh + i); + if (next && next >= deh_location(&(deh[i]))) + reiserfs_warning(NULL, + "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)", + ih, i, deh + i, deh + i + 1); + } + } #endif } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 4a333255f27..a20bbc1642d 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -25,86 +25,85 @@ // directory item contains array of entry headers. This performs // binary search through that array -static int bin_search_in_dir_item (struct reiserfs_dir_entry * de, loff_t off) +static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) { - struct item_head * ih = de->de_ih; - struct reiserfs_de_head * deh = de->de_deh; - int rbound, lbound, j; - - lbound = 0; - rbound = I_ENTRY_COUNT (ih) - 1; - - for (j = (rbound + lbound) / 2; lbound <= rbound; j = (rbound + lbound) / 2) { - if (off < deh_offset (deh + j)) { - rbound = j - 1; - continue; + struct item_head *ih = de->de_ih; + struct reiserfs_de_head *deh = de->de_deh; + int rbound, lbound, j; + + lbound = 0; + rbound = I_ENTRY_COUNT(ih) - 1; + + for (j = (rbound + lbound) / 2; lbound <= rbound; + j = (rbound + lbound) / 2) { + if (off < deh_offset(deh + j)) { + rbound = j - 1; + continue; + } + if (off > deh_offset(deh + j)) { + lbound = j + 1; + continue; + } + // this is not name found, but matched third key component + de->de_entry_num = j; + return NAME_FOUND; } - if (off > deh_offset (deh + j)) { - lbound = j + 1; - continue; - } - // this is not name found, but matched third key component - de->de_entry_num = j; - return NAME_FOUND; - } - de->de_entry_num = lbound; - return NAME_NOT_FOUND; + de->de_entry_num = lbound; + return NAME_NOT_FOUND; } - // comment? maybe something like set de to point to what the path points to? -static inline void set_de_item_location (struct reiserfs_dir_entry * de, struct path * path) +static inline void set_de_item_location(struct reiserfs_dir_entry *de, + struct path *path) { - de->de_bh = get_last_bh (path); - de->de_ih = get_ih (path); - de->de_deh = B_I_DEH (de->de_bh, de->de_ih); - de->de_item_num = PATH_LAST_POSITION (path); -} - + de->de_bh = get_last_bh(path); + de->de_ih = get_ih(path); + de->de_deh = B_I_DEH(de->de_bh, de->de_ih); + de->de_item_num = PATH_LAST_POSITION(path); +} // de_bh, de_ih, de_deh (points to first element of array), de_item_num is set -inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de) +inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) { - struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num; + struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; - if (de->de_entry_num >= ih_entry_count (de->de_ih)) - BUG (); + if (de->de_entry_num >= ih_entry_count(de->de_ih)) + BUG(); - de->de_entrylen = entry_length (de->de_bh, de->de_ih, de->de_entry_num); - de->de_namelen = de->de_entrylen - (de_with_sd (deh) ? SD_SIZE : 0); - de->de_name = B_I_PITEM (de->de_bh, de->de_ih) + deh_location(deh); - if (de->de_name[de->de_namelen - 1] == 0) - de->de_namelen = strlen (de->de_name); + de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); + de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); + de->de_name = B_I_PITEM(de->de_bh, de->de_ih) + deh_location(deh); + if (de->de_name[de->de_namelen - 1] == 0) + de->de_namelen = strlen(de->de_name); } - // what entry points to -static inline void set_de_object_key (struct reiserfs_dir_entry * de) +static inline void set_de_object_key(struct reiserfs_dir_entry *de) { - if (de->de_entry_num >= ih_entry_count (de->de_ih)) - BUG (); - de->de_dir_id = deh_dir_id( &(de->de_deh[de->de_entry_num])); - de->de_objectid = deh_objectid( &(de->de_deh[de->de_entry_num])); + if (de->de_entry_num >= ih_entry_count(de->de_ih)) + BUG(); + de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num])); + de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num])); } - -static inline void store_de_entry_key (struct reiserfs_dir_entry * de) +static inline void store_de_entry_key(struct reiserfs_dir_entry *de) { - struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num; - - if (de->de_entry_num >= ih_entry_count (de->de_ih)) - BUG (); - - /* store key of the found entry */ - de->de_entry_key.version = KEY_FORMAT_3_5; - de->de_entry_key.on_disk_key.k_dir_id = le32_to_cpu (de->de_ih->ih_key.k_dir_id); - de->de_entry_key.on_disk_key.k_objectid = le32_to_cpu (de->de_ih->ih_key.k_objectid); - set_cpu_key_k_offset (&(de->de_entry_key), deh_offset (deh)); - set_cpu_key_k_type (&(de->de_entry_key), TYPE_DIRENTRY); + struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; + + if (de->de_entry_num >= ih_entry_count(de->de_ih)) + BUG(); + + /* store key of the found entry */ + de->de_entry_key.version = KEY_FORMAT_3_5; + de->de_entry_key.on_disk_key.k_dir_id = + le32_to_cpu(de->de_ih->ih_key.k_dir_id); + de->de_entry_key.on_disk_key.k_objectid = + le32_to_cpu(de->de_ih->ih_key.k_objectid); + set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh)); + set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY); } - /* We assign a key to each directory item, and place multiple entries in a single directory item. A directory item has a key equal to the key of the first directory entry in it. @@ -117,58 +116,60 @@ entry position in the item */ /* The function is NOT SCHEDULE-SAFE! */ -int search_by_entry_key (struct super_block * sb, const struct cpu_key * key, - struct path * path, struct reiserfs_dir_entry * de) +int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, + struct path *path, struct reiserfs_dir_entry *de) { - int retval; - - retval = search_item (sb, key, path); - switch (retval) { - case ITEM_NOT_FOUND: - if (!PATH_LAST_POSITION (path)) { - reiserfs_warning (sb, "vs-7000: search_by_entry_key: search_by_key returned item position == 0"); - pathrelse(path) ; - return IO_ERROR ; + int retval; + + retval = search_item(sb, key, path); + switch (retval) { + case ITEM_NOT_FOUND: + if (!PATH_LAST_POSITION(path)) { + reiserfs_warning(sb, + "vs-7000: search_by_entry_key: search_by_key returned item position == 0"); + pathrelse(path); + return IO_ERROR; + } + PATH_LAST_POSITION(path)--; + + case ITEM_FOUND: + break; + + case IO_ERROR: + return retval; + + default: + pathrelse(path); + reiserfs_warning(sb, + "vs-7002: search_by_entry_key: no path to here"); + return IO_ERROR; } - PATH_LAST_POSITION (path) --; - - case ITEM_FOUND: - break; - - case IO_ERROR: - return retval; - default: - pathrelse (path); - reiserfs_warning (sb, "vs-7002: search_by_entry_key: no path to here"); - return IO_ERROR; - } - - set_de_item_location (de, path); + set_de_item_location(de, path); #ifdef CONFIG_REISERFS_CHECK - if (!is_direntry_le_ih (de->de_ih) || - COMP_SHORT_KEYS (&(de->de_ih->ih_key), key)) { - print_block (de->de_bh, 0, -1, -1); - reiserfs_panic (sb, "vs-7005: search_by_entry_key: found item %h is not directory item or " - "does not belong to the same directory as key %K", de->de_ih, key); - } -#endif /* CONFIG_REISERFS_CHECK */ - - /* binary search in directory item by third componen t of the - key. sets de->de_entry_num of de */ - retval = bin_search_in_dir_item (de, cpu_key_k_offset (key)); - path->pos_in_item = de->de_entry_num; - if (retval != NAME_NOT_FOUND) { - // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set - set_de_name_and_namelen (de); - set_de_object_key (de); - } - return retval; + if (!is_direntry_le_ih(de->de_ih) || + COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) { + print_block(de->de_bh, 0, -1, -1); + reiserfs_panic(sb, + "vs-7005: search_by_entry_key: found item %h is not directory item or " + "does not belong to the same directory as key %K", + de->de_ih, key); + } +#endif /* CONFIG_REISERFS_CHECK */ + + /* binary search in directory item by third componen t of the + key. sets de->de_entry_num of de */ + retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); + path->pos_in_item = de->de_entry_num; + if (retval != NAME_NOT_FOUND) { + // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set + set_de_name_and_namelen(de); + set_de_object_key(de); + } + return retval; } - - /* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ /* The third component is hashed, and you can choose from more than @@ -176,197 +177,210 @@ int search_by_entry_key (struct super_block * sb, const struct cpu_key * key, but are thought about. This function should be moved to hashes.c Jedi, please do so. -Hans */ -static __u32 get_third_component (struct super_block * s, - const char * name, int len) +static __u32 get_third_component(struct super_block *s, + const char *name, int len) { - __u32 res; - - if (!len || (len == 1 && name[0] == '.')) - return DOT_OFFSET; - if (len == 2 && name[0] == '.' && name[1] == '.') - return DOT_DOT_OFFSET; - - res = REISERFS_SB(s)->s_hash_function (name, len); - - // take bits from 7-th to 30-th including both bounds - res = GET_HASH_VALUE(res); - if (res == 0) - // needed to have no names before "." and ".." those have hash - // value == 0 and generation conters 1 and 2 accordingly - res = 128; - return res + MAX_GENERATION_NUMBER; + __u32 res; + + if (!len || (len == 1 && name[0] == '.')) + return DOT_OFFSET; + if (len == 2 && name[0] == '.' && name[1] == '.') + return DOT_DOT_OFFSET; + + res = REISERFS_SB(s)->s_hash_function(name, len); + + // take bits from 7-th to 30-th including both bounds + res = GET_HASH_VALUE(res); + if (res == 0) + // needed to have no names before "." and ".." those have hash + // value == 0 and generation conters 1 and 2 accordingly + res = 128; + return res + MAX_GENERATION_NUMBER; } - -static int reiserfs_match (struct reiserfs_dir_entry * de, - const char * name, int namelen) +static int reiserfs_match(struct reiserfs_dir_entry *de, + const char *name, int namelen) { - int retval = NAME_NOT_FOUND; + int retval = NAME_NOT_FOUND; - if ((namelen == de->de_namelen) && - !memcmp(de->de_name, name, de->de_namelen)) - retval = (de_visible (de->de_deh + de->de_entry_num) ? NAME_FOUND : NAME_FOUND_INVISIBLE); + if ((namelen == de->de_namelen) && + !memcmp(de->de_name, name, de->de_namelen)) + retval = + (de_visible(de->de_deh + de->de_entry_num) ? NAME_FOUND : + NAME_FOUND_INVISIBLE); - return retval; + return retval; } - /* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ /* used when hash collisions exist */ - -static int linear_search_in_dir_item (struct cpu_key * key, struct reiserfs_dir_entry * de, - const char * name, int namelen) +static int linear_search_in_dir_item(struct cpu_key *key, + struct reiserfs_dir_entry *de, + const char *name, int namelen) { - struct reiserfs_de_head * deh = de->de_deh; - int retval; - int i; + struct reiserfs_de_head *deh = de->de_deh; + int retval; + int i; - i = de->de_entry_num; + i = de->de_entry_num; - if (i == I_ENTRY_COUNT (de->de_ih) || - GET_HASH_VALUE (deh_offset (deh + i)) != GET_HASH_VALUE (cpu_key_k_offset (key))) { - i --; - } + if (i == I_ENTRY_COUNT(de->de_ih) || + GET_HASH_VALUE(deh_offset(deh + i)) != + GET_HASH_VALUE(cpu_key_k_offset(key))) { + i--; + } - RFALSE( de->de_deh != B_I_DEH (de->de_bh, de->de_ih), - "vs-7010: array of entry headers not found"); + RFALSE(de->de_deh != B_I_DEH(de->de_bh, de->de_ih), + "vs-7010: array of entry headers not found"); - deh += i; + deh += i; - for (; i >= 0; i --, deh --) { - if (GET_HASH_VALUE (deh_offset (deh)) != - GET_HASH_VALUE (cpu_key_k_offset (key))) { - // hash value does not match, no need to check whole name - return NAME_NOT_FOUND; - } - - /* mark, that this generation number is used */ - if (de->de_gen_number_bit_string) - set_bit (GET_GENERATION_NUMBER (deh_offset (deh)), (unsigned long *)de->de_gen_number_bit_string); + for (; i >= 0; i--, deh--) { + if (GET_HASH_VALUE(deh_offset(deh)) != + GET_HASH_VALUE(cpu_key_k_offset(key))) { + // hash value does not match, no need to check whole name + return NAME_NOT_FOUND; + } + + /* mark, that this generation number is used */ + if (de->de_gen_number_bit_string) + set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), + (unsigned long *)de->de_gen_number_bit_string); - // calculate pointer to name and namelen - de->de_entry_num = i; - set_de_name_and_namelen (de); + // calculate pointer to name and namelen + de->de_entry_num = i; + set_de_name_and_namelen(de); - if ((retval = reiserfs_match (de, name, namelen)) != NAME_NOT_FOUND) { - // de's de_name, de_namelen, de_recordlen are set. Fill the rest: + if ((retval = + reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { + // de's de_name, de_namelen, de_recordlen are set. Fill the rest: - // key of pointed object - set_de_object_key (de); + // key of pointed object + set_de_object_key(de); - store_de_entry_key (de); + store_de_entry_key(de); - // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE - return retval; + // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE + return retval; + } } - } - - if (GET_GENERATION_NUMBER (le_ih_k_offset (de->de_ih)) == 0) - /* we have reached left most entry in the node. In common we - have to go to the left neighbor, but if generation counter - is 0 already, we know for sure, that there is no name with - the same hash value */ - // FIXME: this work correctly only because hash value can not - // be 0. Btw, in case of Yura's hash it is probably possible, - // so, this is a bug - return NAME_NOT_FOUND; - RFALSE( de->de_item_num, - "vs-7015: two diritems of the same directory in one node?"); + if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) + /* we have reached left most entry in the node. In common we + have to go to the left neighbor, but if generation counter + is 0 already, we know for sure, that there is no name with + the same hash value */ + // FIXME: this work correctly only because hash value can not + // be 0. Btw, in case of Yura's hash it is probably possible, + // so, this is a bug + return NAME_NOT_FOUND; - return GOTO_PREVIOUS_ITEM; -} + RFALSE(de->de_item_num, + "vs-7015: two diritems of the same directory in one node?"); + return GOTO_PREVIOUS_ITEM; +} // may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND // FIXME: should add something like IOERROR -static int reiserfs_find_entry (struct inode * dir, const char * name, int namelen, - struct path * path_to_entry, struct reiserfs_dir_entry * de) +static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, + struct path *path_to_entry, + struct reiserfs_dir_entry *de) { - struct cpu_key key_to_search; - int retval; - - - if (namelen > REISERFS_MAX_NAME (dir->i_sb->s_blocksize)) - return NAME_NOT_FOUND; - - /* we will search for this key in the tree */ - make_cpu_key (&key_to_search, dir, - get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3); - - while (1) { - retval = search_by_entry_key (dir->i_sb, &key_to_search, path_to_entry, de); - if (retval == IO_ERROR) { - reiserfs_warning (dir->i_sb, "zam-7001: io error in %s", - __FUNCTION__); - return IO_ERROR; - } - - /* compare names for all entries having given hash value */ - retval = linear_search_in_dir_item (&key_to_search, de, name, namelen); - if (retval != GOTO_PREVIOUS_ITEM) { - /* there is no need to scan directory anymore. Given entry found or does not exist */ - path_to_entry->pos_in_item = de->de_entry_num; - return retval; - } - - /* there is left neighboring item of this directory and given entry can be there */ - set_cpu_key_k_offset (&key_to_search, le_ih_k_offset (de->de_ih) - 1); - pathrelse (path_to_entry); - - } /* while (1) */ + struct cpu_key key_to_search; + int retval; + + if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize)) + return NAME_NOT_FOUND; + + /* we will search for this key in the tree */ + make_cpu_key(&key_to_search, dir, + get_third_component(dir->i_sb, name, namelen), + TYPE_DIRENTRY, 3); + + while (1) { + retval = + search_by_entry_key(dir->i_sb, &key_to_search, + path_to_entry, de); + if (retval == IO_ERROR) { + reiserfs_warning(dir->i_sb, "zam-7001: io error in %s", + __FUNCTION__); + return IO_ERROR; + } + + /* compare names for all entries having given hash value */ + retval = + linear_search_in_dir_item(&key_to_search, de, name, + namelen); + if (retval != GOTO_PREVIOUS_ITEM) { + /* there is no need to scan directory anymore. Given entry found or does not exist */ + path_to_entry->pos_in_item = de->de_entry_num; + return retval; + } + + /* there is left neighboring item of this directory and given entry can be there */ + set_cpu_key_k_offset(&key_to_search, + le_ih_k_offset(de->de_ih) - 1); + pathrelse(path_to_entry); + + } /* while (1) */ } - -static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int retval; - struct inode * inode = NULL; - struct reiserfs_dir_entry de; - INITIALIZE_PATH (path_to_entry); - - if (REISERFS_MAX_NAME (dir->i_sb->s_blocksize) < dentry->d_name.len) - return ERR_PTR(-ENAMETOOLONG); - - reiserfs_write_lock(dir->i_sb); - de.de_gen_number_bit_string = NULL; - retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path_to_entry, &de); - pathrelse (&path_to_entry); - if (retval == NAME_FOUND) { - /* Hide the .reiserfs_priv directory */ - if (reiserfs_xattrs (dir->i_sb) && - !old_format_only(dir->i_sb) && - REISERFS_SB(dir->i_sb)->priv_root && - REISERFS_SB(dir->i_sb)->priv_root->d_inode && - de.de_objectid == le32_to_cpu (INODE_PKEY(REISERFS_SB(dir->i_sb)->priv_root->d_inode)->k_objectid)) { - reiserfs_write_unlock (dir->i_sb); - return ERR_PTR (-EACCES); + int retval; + struct inode *inode = NULL; + struct reiserfs_dir_entry de; + INITIALIZE_PATH(path_to_entry); + + if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) + return ERR_PTR(-ENAMETOOLONG); + + reiserfs_write_lock(dir->i_sb); + de.de_gen_number_bit_string = NULL; + retval = + reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, + &path_to_entry, &de); + pathrelse(&path_to_entry); + if (retval == NAME_FOUND) { + /* Hide the .reiserfs_priv directory */ + if (reiserfs_xattrs(dir->i_sb) && + !old_format_only(dir->i_sb) && + REISERFS_SB(dir->i_sb)->priv_root && + REISERFS_SB(dir->i_sb)->priv_root->d_inode && + de.de_objectid == + le32_to_cpu(INODE_PKEY + (REISERFS_SB(dir->i_sb)->priv_root->d_inode)-> + k_objectid)) { + reiserfs_write_unlock(dir->i_sb); + return ERR_PTR(-EACCES); + } + + inode = + reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); + if (!inode || IS_ERR(inode)) { + reiserfs_write_unlock(dir->i_sb); + return ERR_PTR(-EACCES); + } + + /* Propogate the priv_object flag so we know we're in the priv tree */ + if (is_reiserfs_priv_object(dir)) + reiserfs_mark_inode_private(inode); + } + reiserfs_write_unlock(dir->i_sb); + if (retval == IO_ERROR) { + return ERR_PTR(-EIO); } - inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); - if (!inode || IS_ERR(inode)) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-EACCES); - } - - /* Propogate the priv_object flag so we know we're in the priv tree */ - if (is_reiserfs_priv_object (dir)) - reiserfs_mark_inode_private (inode); - } - reiserfs_write_unlock(dir->i_sb); - if ( retval == IO_ERROR ) { - return ERR_PTR(-EIO); - } - - if (inode) - return d_splice_alias(inode, dentry); - - d_add(dentry, inode); - return NULL; -} + if (inode) + return d_splice_alias(inode, dentry); + d_add(dentry, inode); + return NULL; +} /* ** looks up the dentry of the parent directory for child. @@ -374,40 +388,38 @@ static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dent */ struct dentry *reiserfs_get_parent(struct dentry *child) { - int retval; - struct inode * inode = NULL; - struct reiserfs_dir_entry de; - INITIALIZE_PATH (path_to_entry); - struct dentry *parent; - struct inode *dir = child->d_inode ; - - - if (dir->i_nlink == 0) { - return ERR_PTR(-ENOENT); - } - de.de_gen_number_bit_string = NULL; - - reiserfs_write_lock(dir->i_sb); - retval = reiserfs_find_entry (dir, "..", 2, &path_to_entry, &de); - pathrelse (&path_to_entry); - if (retval != NAME_FOUND) { + int retval; + struct inode *inode = NULL; + struct reiserfs_dir_entry de; + INITIALIZE_PATH(path_to_entry); + struct dentry *parent; + struct inode *dir = child->d_inode; + + if (dir->i_nlink == 0) { + return ERR_PTR(-ENOENT); + } + de.de_gen_number_bit_string = NULL; + + reiserfs_write_lock(dir->i_sb); + retval = reiserfs_find_entry(dir, "..", 2, &path_to_entry, &de); + pathrelse(&path_to_entry); + if (retval != NAME_FOUND) { + reiserfs_write_unlock(dir->i_sb); + return ERR_PTR(-ENOENT); + } + inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-ENOENT); - } - inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); - reiserfs_write_unlock(dir->i_sb); - - if (!inode || IS_ERR(inode)) { - return ERR_PTR(-EACCES); - } - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - return parent; -} + if (!inode || IS_ERR(inode)) { + return ERR_PTR(-EACCES); + } + parent = d_alloc_anon(inode); + if (!parent) { + iput(inode); + parent = ERR_PTR(-ENOMEM); + } + return parent; +} /* add entry to the directory (entry can be hidden). @@ -415,132 +427,143 @@ insert definition of when hidden directories are used here -Hans Does not mark dir inode dirty, do it after successesfull call to it */ -static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct inode * dir, - const char * name, int namelen, struct inode * inode, - int visible) +static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, + struct inode *dir, const char *name, int namelen, + struct inode *inode, int visible) { - struct cpu_key entry_key; - struct reiserfs_de_head * deh; - INITIALIZE_PATH (path); - struct reiserfs_dir_entry de; - int bit_string [MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1]; - int gen_number; - char small_buf[32+DEH_SIZE] ; /* 48 bytes now and we avoid kmalloc - if we create file with short name */ - char * buffer; - int buflen, paste_size; - int retval; - - BUG_ON (!th->t_trans_id); - - /* cannot allow items to be added into a busy deleted directory */ - if (!namelen) - return -EINVAL; - - if (namelen > REISERFS_MAX_NAME (dir->i_sb->s_blocksize)) - return -ENAMETOOLONG; - - /* each entry has unique key. compose it */ - make_cpu_key (&entry_key, dir, - get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3); - - /* get memory for composing the entry */ - buflen = DEH_SIZE + ROUND_UP (namelen); - if (buflen > sizeof (small_buf)) { - buffer = reiserfs_kmalloc (buflen, GFP_NOFS, dir->i_sb); - if (buffer == 0) - return -ENOMEM; - } else - buffer = small_buf; - - paste_size = (get_inode_sd_version (dir) == STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; - - /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ - deh = (struct reiserfs_de_head *)buffer; - deh->deh_location = 0; /* JDM Endian safe if 0 */ - put_deh_offset( deh, cpu_key_k_offset( &entry_key ) ); - deh->deh_state = 0; /* JDM Endian safe if 0 */ - /* put key (ino analog) to de */ - deh->deh_dir_id = INODE_PKEY (inode)->k_dir_id; /* safe: k_dir_id is le */ - deh->deh_objectid = INODE_PKEY (inode)->k_objectid; /* safe: k_objectid is le */ - - /* copy name */ - memcpy ((char *)(deh + 1), name, namelen); - /* padd by 0s to the 4 byte boundary */ - padd_item ((char *)(deh + 1), ROUND_UP (namelen), namelen); - - /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ - mark_de_without_sd (deh); - visible ? mark_de_visible (deh) : mark_de_hidden (deh); - - /* find the proper place for the new entry */ - memset (bit_string, 0, sizeof (bit_string)); - de.de_gen_number_bit_string = (char *)bit_string; - retval = reiserfs_find_entry (dir, name, namelen, &path, &de); - if( retval != NAME_NOT_FOUND ) { - if (buffer != small_buf) - reiserfs_kfree (buffer, buflen, dir->i_sb); - pathrelse (&path); + struct cpu_key entry_key; + struct reiserfs_de_head *deh; + INITIALIZE_PATH(path); + struct reiserfs_dir_entry de; + int bit_string[MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1]; + int gen_number; + char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc + if we create file with short name */ + char *buffer; + int buflen, paste_size; + int retval; + + BUG_ON(!th->t_trans_id); + + /* cannot allow items to be added into a busy deleted directory */ + if (!namelen) + return -EINVAL; + + if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize)) + return -ENAMETOOLONG; + + /* each entry has unique key. compose it */ + make_cpu_key(&entry_key, dir, + get_third_component(dir->i_sb, name, namelen), + TYPE_DIRENTRY, 3); + + /* get memory for composing the entry */ + buflen = DEH_SIZE + ROUND_UP(namelen); + if (buflen > sizeof(small_buf)) { + buffer = reiserfs_kmalloc(buflen, GFP_NOFS, dir->i_sb); + if (buffer == 0) + return -ENOMEM; + } else + buffer = small_buf; + + paste_size = + (get_inode_sd_version(dir) == + STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; + + /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ + deh = (struct reiserfs_de_head *)buffer; + deh->deh_location = 0; /* JDM Endian safe if 0 */ + put_deh_offset(deh, cpu_key_k_offset(&entry_key)); + deh->deh_state = 0; /* JDM Endian safe if 0 */ + /* put key (ino analog) to de */ + deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; /* safe: k_dir_id is le */ + deh->deh_objectid = INODE_PKEY(inode)->k_objectid; /* safe: k_objectid is le */ + + /* copy name */ + memcpy((char *)(deh + 1), name, namelen); + /* padd by 0s to the 4 byte boundary */ + padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); + + /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ + mark_de_without_sd(deh); + visible ? mark_de_visible(deh) : mark_de_hidden(deh); + + /* find the proper place for the new entry */ + memset(bit_string, 0, sizeof(bit_string)); + de.de_gen_number_bit_string = (char *)bit_string; + retval = reiserfs_find_entry(dir, name, namelen, &path, &de); + if (retval != NAME_NOT_FOUND) { + if (buffer != small_buf) + reiserfs_kfree(buffer, buflen, dir->i_sb); + pathrelse(&path); + + if (retval == IO_ERROR) { + return -EIO; + } + + if (retval != NAME_FOUND) { + reiserfs_warning(dir->i_sb, + "zam-7002:%s: \"reiserfs_find_entry\" " + "has returned unexpected value (%d)", + __FUNCTION__, retval); + } + + return -EEXIST; + } - if ( retval == IO_ERROR ) { - return -EIO; + gen_number = + find_first_zero_bit((unsigned long *)bit_string, + MAX_GENERATION_NUMBER + 1); + if (gen_number > MAX_GENERATION_NUMBER) { + /* there is no free generation number */ + reiserfs_warning(dir->i_sb, + "reiserfs_add_entry: Congratulations! we have got hash function screwed up"); + if (buffer != small_buf) + reiserfs_kfree(buffer, buflen, dir->i_sb); + pathrelse(&path); + return -EBUSY; + } + /* adjust offset of directory enrty */ + put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number)); + set_cpu_key_k_offset(&entry_key, deh_offset(deh)); + + /* update max-hash-collisions counter in reiserfs_sb_info */ + PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); + + if (gen_number != 0) { /* we need to re-search for the insertion point */ + if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != + NAME_NOT_FOUND) { + reiserfs_warning(dir->i_sb, + "vs-7032: reiserfs_add_entry: " + "entry with this key (%K) already exists", + &entry_key); + + if (buffer != small_buf) + reiserfs_kfree(buffer, buflen, dir->i_sb); + pathrelse(&path); + return -EBUSY; + } } - if (retval != NAME_FOUND) { - reiserfs_warning (dir->i_sb, "zam-7002:%s: \"reiserfs_find_entry\" " - "has returned unexpected value (%d)", - __FUNCTION__, retval); - } - - return -EEXIST; - } - - gen_number = find_first_zero_bit ((unsigned long *)bit_string, MAX_GENERATION_NUMBER + 1); - if (gen_number > MAX_GENERATION_NUMBER) { - /* there is no free generation number */ - reiserfs_warning (dir->i_sb, "reiserfs_add_entry: Congratulations! we have got hash function screwed up"); - if (buffer != small_buf) - reiserfs_kfree (buffer, buflen, dir->i_sb); - pathrelse (&path); - return -EBUSY; - } - /* adjust offset of directory enrty */ - put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number)); - set_cpu_key_k_offset (&entry_key, deh_offset(deh)); - - /* update max-hash-collisions counter in reiserfs_sb_info */ - PROC_INFO_MAX( th -> t_super, max_hash_collisions, gen_number ); - - if (gen_number != 0) { /* we need to re-search for the insertion point */ - if (search_by_entry_key (dir->i_sb, &entry_key, &path, &de) != NAME_NOT_FOUND) { - reiserfs_warning (dir->i_sb, "vs-7032: reiserfs_add_entry: " - "entry with this key (%K) already exists", - &entry_key); - - if (buffer != small_buf) - reiserfs_kfree (buffer, buflen, dir->i_sb); - pathrelse (&path); - return -EBUSY; + /* perform the insertion of the entry that we have prepared */ + retval = + reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer, + paste_size); + if (buffer != small_buf) + reiserfs_kfree(buffer, buflen, dir->i_sb); + if (retval) { + reiserfs_check_path(&path); + return retval; } - } - - /* perform the insertion of the entry that we have prepared */ - retval = reiserfs_paste_into_item (th, &path, &entry_key, dir, buffer, paste_size); - if (buffer != small_buf) - reiserfs_kfree (buffer, buflen, dir->i_sb); - if (retval) { - reiserfs_check_path(&path) ; - return retval; - } - dir->i_size += paste_size; - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; - if (!S_ISDIR (inode->i_mode) && visible) - // reiserfs_mkdir or reiserfs_rename will do that by itself - reiserfs_update_sd (th, dir); + dir->i_size += paste_size; + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + if (!S_ISDIR(inode->i_mode) && visible) + // reiserfs_mkdir or reiserfs_rename will do that by itself + reiserfs_update_sd(th, dir); - reiserfs_check_path(&path) ; - return 0; + reiserfs_check_path(&path); + return 0; } /* quota utility function, call if you've had to abort after calling @@ -548,12 +571,13 @@ static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct in ** This should only be called on inodes that do not have stat data ** inserted into the tree yet. */ -static int drop_new_inode(struct inode *inode) { - DQUOT_DROP(inode); - make_bad_inode(inode) ; - inode->i_flags |= S_NOQUOTA; - iput(inode) ; - return 0 ; +static int drop_new_inode(struct inode *inode) +{ + DQUOT_DROP(inode); + make_bad_inode(inode); + inode->i_flags |= S_NOQUOTA; + iput(inode); + return 0; } /* utility function that does setup for reiserfs_new_inode. @@ -561,905 +585,968 @@ static int drop_new_inode(struct inode *inode) { ** outside of a transaction, so we had to pull some bits of ** reiserfs_new_inode out into this func. */ -static int new_inode_init(struct inode *inode, struct inode *dir, int mode) { - - /* the quota init calls have to know who to charge the quota to, so - ** we have to set uid and gid here - */ - inode->i_uid = current->fsuid; - inode->i_mode = mode; - - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - inode->i_mode |= S_ISGID; - } else { - inode->i_gid = current->fsgid; - } - DQUOT_INIT(inode); - return 0 ; +static int new_inode_init(struct inode *inode, struct inode *dir, int mode) +{ + + /* the quota init calls have to know who to charge the quota to, so + ** we have to set uid and gid here + */ + inode->i_uid = current->fsuid; + inode->i_mode = mode; + + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + inode->i_mode |= S_ISGID; + } else { + inode->i_gid = current->fsgid; + } + DQUOT_INIT(inode); + return 0; } -static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode, - struct nameidata *nd) +static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { - int retval; - struct inode * inode; - /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - struct reiserfs_transaction_handle th ; - int locked; - - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM ; - } - new_inode_init(inode, dir, mode); - - locked = reiserfs_cache_default_acl (dir); - - reiserfs_write_lock(dir->i_sb); - - if (locked) - reiserfs_write_lock_xattrs (dir->i_sb); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode (inode); - goto out_failed; - } - - retval = reiserfs_new_inode (&th, dir, mode, NULL, 0/*i_size*/, dentry, inode); - if (retval) - goto out_failed; - - if (locked) { - reiserfs_write_unlock_xattrs (dir->i_sb); - locked = 0; - } - - inode->i_op = &reiserfs_file_inode_operations; - inode->i_fop = &reiserfs_file_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations ; - - retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, - inode, 1/*visible*/); - if (retval) { - int err; - inode->i_nlink--; - reiserfs_update_sd (&th, inode); - err = journal_end(&th, dir->i_sb, jbegin_count) ; - if (err) - retval = err; - iput (inode); - goto out_failed; - } - reiserfs_update_inode_transaction(inode) ; - reiserfs_update_inode_transaction(dir) ; - - d_instantiate(dentry, inode); - retval = journal_end(&th, dir->i_sb, jbegin_count) ; - -out_failed: - if (locked) - reiserfs_write_unlock_xattrs (dir->i_sb); - reiserfs_write_unlock(dir->i_sb); - return retval; -} + int retval; + struct inode *inode; + /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 2 + + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); + struct reiserfs_transaction_handle th; + int locked; + + if (!(inode = new_inode(dir->i_sb))) { + return -ENOMEM; + } + new_inode_init(inode, dir, mode); + locked = reiserfs_cache_default_acl(dir); -static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) -{ - int retval; - struct inode * inode; - struct reiserfs_transaction_handle th ; - /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - int locked; + reiserfs_write_lock(dir->i_sb); - if (!new_valid_dev(rdev)) - return -EINVAL; + if (locked) + reiserfs_write_lock_xattrs(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) { + drop_new_inode(inode); + goto out_failed; + } + + retval = + reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, + inode); + if (retval) + goto out_failed; + + if (locked) { + reiserfs_write_unlock_xattrs(dir->i_sb); + locked = 0; + } + + inode->i_op = &reiserfs_file_inode_operations; + inode->i_fop = &reiserfs_file_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations; + + retval = + reiserfs_add_entry(&th, dir, dentry->d_name.name, + dentry->d_name.len, inode, 1 /*visible */ ); + if (retval) { + int err; + inode->i_nlink--; + reiserfs_update_sd(&th, inode); + err = journal_end(&th, dir->i_sb, jbegin_count); + if (err) + retval = err; + iput(inode); + goto out_failed; + } + reiserfs_update_inode_transaction(inode); + reiserfs_update_inode_transaction(dir); - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM ; - } - new_inode_init(inode, dir, mode); + d_instantiate(dentry, inode); + retval = journal_end(&th, dir->i_sb, jbegin_count); - locked = reiserfs_cache_default_acl (dir); + out_failed: + if (locked) + reiserfs_write_unlock_xattrs(dir->i_sb); + reiserfs_write_unlock(dir->i_sb); + return retval; +} - reiserfs_write_lock(dir->i_sb); +static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode, + dev_t rdev) +{ + int retval; + struct inode *inode; + struct reiserfs_transaction_handle th; + /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 3 + + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); + int locked; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + if (!(inode = new_inode(dir->i_sb))) { + return -ENOMEM; + } + new_inode_init(inode, dir, mode); - if (locked) - reiserfs_write_lock_xattrs (dir->i_sb); + locked = reiserfs_cache_default_acl(dir); - retval = journal_begin(&th, dir->i_sb, jbegin_count) ; - if (retval) { - drop_new_inode (inode); - goto out_failed; - } + reiserfs_write_lock(dir->i_sb); - retval = reiserfs_new_inode (&th, dir, mode, NULL, 0/*i_size*/, dentry, inode); - if (retval) { - goto out_failed; - } + if (locked) + reiserfs_write_lock_xattrs(dir->i_sb); - if (locked) { - reiserfs_write_unlock_xattrs (dir->i_sb); - locked = 0; - } + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) { + drop_new_inode(inode); + goto out_failed; + } + retval = + reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, + inode); + if (retval) { + goto out_failed; + } - inode->i_op = &reiserfs_special_inode_operations; - init_special_inode(inode, inode->i_mode, rdev) ; + if (locked) { + reiserfs_write_unlock_xattrs(dir->i_sb); + locked = 0; + } - //FIXME: needed for block and char devices only - reiserfs_update_sd (&th, inode); + inode->i_op = &reiserfs_special_inode_operations; + init_special_inode(inode, inode->i_mode, rdev); + + //FIXME: needed for block and char devices only + reiserfs_update_sd(&th, inode); + + reiserfs_update_inode_transaction(inode); + reiserfs_update_inode_transaction(dir); + + retval = + reiserfs_add_entry(&th, dir, dentry->d_name.name, + dentry->d_name.len, inode, 1 /*visible */ ); + if (retval) { + int err; + inode->i_nlink--; + reiserfs_update_sd(&th, inode); + err = journal_end(&th, dir->i_sb, jbegin_count); + if (err) + retval = err; + iput(inode); + goto out_failed; + } - reiserfs_update_inode_transaction(inode) ; - reiserfs_update_inode_transaction(dir) ; + d_instantiate(dentry, inode); + retval = journal_end(&th, dir->i_sb, jbegin_count); - retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, - inode, 1/*visible*/); - if (retval) { - int err; - inode->i_nlink--; - reiserfs_update_sd (&th, inode); - err = journal_end(&th, dir->i_sb, jbegin_count) ; - if (err) - retval = err; - iput (inode); - goto out_failed; - } - - d_instantiate(dentry, inode); - retval = journal_end(&th, dir->i_sb, jbegin_count) ; - -out_failed: - if (locked) - reiserfs_write_unlock_xattrs (dir->i_sb); - reiserfs_write_unlock(dir->i_sb); - return retval; + out_failed: + if (locked) + reiserfs_write_unlock_xattrs(dir->i_sb); + reiserfs_write_unlock(dir->i_sb); + return retval; } - -static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode) +static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - int retval; - struct inode * inode; - struct reiserfs_transaction_handle th ; - /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - int locked; + int retval; + struct inode *inode; + struct reiserfs_transaction_handle th; + /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 3 + + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); + int locked; #ifdef DISPLACE_NEW_PACKING_LOCALITIES - /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ - REISERFS_I(dir)->new_packing_locality = 1; + /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ + REISERFS_I(dir)->new_packing_locality = 1; #endif - mode = S_IFDIR | mode; - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM ; - } - new_inode_init(inode, dir, mode); - - locked = reiserfs_cache_default_acl (dir); - - reiserfs_write_lock(dir->i_sb); - if (locked) - reiserfs_write_lock_xattrs (dir->i_sb); - - retval = journal_begin(&th, dir->i_sb, jbegin_count) ; - if (retval) { - drop_new_inode (inode); - goto out_failed; - } - - - /* inc the link count now, so another writer doesn't overflow it while - ** we sleep later on. - */ - INC_DIR_INODE_NLINK(dir) - - retval = reiserfs_new_inode (&th, dir, mode, NULL/*symlink*/, - old_format_only (dir->i_sb) ? - EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, - dentry, inode); - if (retval) { - dir->i_nlink-- ; - goto out_failed; - } - - if (locked) { - reiserfs_write_unlock_xattrs (dir->i_sb); - locked = 0; - } - - reiserfs_update_inode_transaction(inode) ; - reiserfs_update_inode_transaction(dir) ; - - inode->i_op = &reiserfs_dir_inode_operations; - inode->i_fop = &reiserfs_dir_operations; - - // note, _this_ add_entry will not update dir's stat data - retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, - inode, 1/*visible*/); - if (retval) { - int err; - inode->i_nlink = 0; - DEC_DIR_INODE_NLINK(dir); - reiserfs_update_sd (&th, inode); - err = journal_end(&th, dir->i_sb, jbegin_count) ; - if (err) - retval = err; - iput (inode); - goto out_failed; - } - - // the above add_entry did not update dir's stat data - reiserfs_update_sd (&th, dir); - - d_instantiate(dentry, inode); - retval = journal_end(&th, dir->i_sb, jbegin_count) ; -out_failed: - if (locked) - reiserfs_write_unlock_xattrs (dir->i_sb); - reiserfs_write_unlock(dir->i_sb); - return retval; -} + mode = S_IFDIR | mode; + if (!(inode = new_inode(dir->i_sb))) { + return -ENOMEM; + } + new_inode_init(inode, dir, mode); + + locked = reiserfs_cache_default_acl(dir); + + reiserfs_write_lock(dir->i_sb); + if (locked) + reiserfs_write_lock_xattrs(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) { + drop_new_inode(inode); + goto out_failed; + } -static inline int reiserfs_empty_dir(struct inode *inode) { - /* we can cheat because an old format dir cannot have - ** EMPTY_DIR_SIZE, and a new format dir cannot have - ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, - ** regardless of disk format version, the directory is empty. - */ - if (inode->i_size != EMPTY_DIR_SIZE && - inode->i_size != EMPTY_DIR_SIZE_V1) { - return 0 ; - } - return 1 ; + /* inc the link count now, so another writer doesn't overflow it while + ** we sleep later on. + */ + INC_DIR_INODE_NLINK(dir) + + retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ , + old_format_only(dir->i_sb) ? + EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, + dentry, inode); + if (retval) { + dir->i_nlink--; + goto out_failed; + } + + if (locked) { + reiserfs_write_unlock_xattrs(dir->i_sb); + locked = 0; + } + + reiserfs_update_inode_transaction(inode); + reiserfs_update_inode_transaction(dir); + + inode->i_op = &reiserfs_dir_inode_operations; + inode->i_fop = &reiserfs_dir_operations; + + // note, _this_ add_entry will not update dir's stat data + retval = + reiserfs_add_entry(&th, dir, dentry->d_name.name, + dentry->d_name.len, inode, 1 /*visible */ ); + if (retval) { + int err; + inode->i_nlink = 0; + DEC_DIR_INODE_NLINK(dir); + reiserfs_update_sd(&th, inode); + err = journal_end(&th, dir->i_sb, jbegin_count); + if (err) + retval = err; + iput(inode); + goto out_failed; + } + // the above add_entry did not update dir's stat data + reiserfs_update_sd(&th, dir); + + d_instantiate(dentry, inode); + retval = journal_end(&th, dir->i_sb, jbegin_count); + out_failed: + if (locked) + reiserfs_write_unlock_xattrs(dir->i_sb); + reiserfs_write_unlock(dir->i_sb); + return retval; } -static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry) +static inline int reiserfs_empty_dir(struct inode *inode) { - int retval, err; - struct inode * inode; - struct reiserfs_transaction_handle th ; - int jbegin_count; - INITIALIZE_PATH (path); - struct reiserfs_dir_entry de; - - - /* we will be doing 2 balancings and update 2 stat data, we change quotas - * of the owner of the directory and of the owner of the parent directory. - * The quota structure is possibly deleted only on last iput => outside - * of this transaction */ - jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - reiserfs_write_lock(dir->i_sb); - retval = journal_begin(&th, dir->i_sb, jbegin_count) ; - if (retval) - goto out_rmdir; - - de.de_gen_number_bit_string = NULL; - if ( (retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de)) == NAME_NOT_FOUND) { - retval = -ENOENT; - goto end_rmdir; - } else if ( retval == IO_ERROR) { - retval = -EIO; - goto end_rmdir; - } - - inode = dentry->d_inode; - - reiserfs_update_inode_transaction(inode) ; - reiserfs_update_inode_transaction(dir) ; - - if (de.de_objectid != inode->i_ino) { - // FIXME: compare key of an object and a key found in the - // entry - retval = -EIO; - goto end_rmdir; - } - if (!reiserfs_empty_dir(inode)) { - retval = -ENOTEMPTY; - goto end_rmdir; - } - - /* cut entry from dir directory */ - retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, - NULL, /* page */ - 0/*new file size - not used here*/); - if (retval < 0) - goto end_rmdir; - - if ( inode->i_nlink != 2 && inode->i_nlink != 1 ) - reiserfs_warning (inode->i_sb, "%s: empty directory has nlink " - "!= 2 (%d)", __FUNCTION__, inode->i_nlink); - - inode->i_nlink = 0; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - reiserfs_update_sd (&th, inode); - - DEC_DIR_INODE_NLINK(dir) - dir->i_size -= (DEH_SIZE + de.de_entrylen); - reiserfs_update_sd (&th, dir); - - /* prevent empty directory from getting lost */ - add_save_link (&th, inode, 0/* not truncate */); - - retval = journal_end(&th, dir->i_sb, jbegin_count) ; - reiserfs_check_path(&path) ; -out_rmdir: - reiserfs_write_unlock(dir->i_sb); - return retval; - - end_rmdir: - /* we must release path, because we did not call - reiserfs_cut_from_item, or reiserfs_cut_from_item does not - release path if operation was not complete */ - pathrelse (&path); - err = journal_end(&th, dir->i_sb, jbegin_count) ; - reiserfs_write_unlock(dir->i_sb); - return err ? err : retval; + /* we can cheat because an old format dir cannot have + ** EMPTY_DIR_SIZE, and a new format dir cannot have + ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, + ** regardless of disk format version, the directory is empty. + */ + if (inode->i_size != EMPTY_DIR_SIZE && + inode->i_size != EMPTY_DIR_SIZE_V1) { + return 0; + } + return 1; } -static int reiserfs_unlink (struct inode * dir, struct dentry *dentry) +static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) { - int retval, err; - struct inode * inode; - struct reiserfs_dir_entry de; - INITIALIZE_PATH (path); - struct reiserfs_transaction_handle th ; - int jbegin_count; - unsigned long savelink; - - inode = dentry->d_inode; - - /* in this transaction we can be doing at max two balancings and update - * two stat datas, we change quotas of the owner of the directory and of - * the owner of the parent directory. The quota structure is possibly - * deleted only on iput => outside of this transaction */ - jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - reiserfs_write_lock(dir->i_sb); - retval = journal_begin(&th, dir->i_sb, jbegin_count) ; - if (retval) - goto out_unlink; - - de.de_gen_number_bit_string = NULL; - if ( (retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de)) == NAME_NOT_FOUND) { - retval = -ENOENT; - goto end_unlink; - } else if (retval == IO_ERROR) { - retval = -EIO; - goto end_unlink; - } - - reiserfs_update_inode_transaction(inode) ; - reiserfs_update_inode_transaction(dir) ; - - if (de.de_objectid != inode->i_ino) { - // FIXME: compare key of an object and a key found in the - // entry - retval = -EIO; - goto end_unlink; - } - - if (!inode->i_nlink) { - reiserfs_warning (inode->i_sb, "%s: deleting nonexistent file " - "(%s:%lu), %d", __FUNCTION__, - reiserfs_bdevname (inode->i_sb), inode->i_ino, - inode->i_nlink); - inode->i_nlink = 1; - } - - inode->i_nlink--; - - /* - * we schedule before doing the add_save_link call, save the link - * count so we don't race - */ - savelink = inode->i_nlink; - - - retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, NULL, 0); - if (retval < 0) { - inode->i_nlink++; - goto end_unlink; - } - inode->i_ctime = CURRENT_TIME_SEC; - reiserfs_update_sd (&th, inode); - - dir->i_size -= (de.de_entrylen + DEH_SIZE); - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - reiserfs_update_sd (&th, dir); - - if (!savelink) - /* prevent file from getting lost */ - add_save_link (&th, inode, 0/* not truncate */); - - retval = journal_end(&th, dir->i_sb, jbegin_count) ; - reiserfs_check_path(&path) ; - reiserfs_write_unlock(dir->i_sb); - return retval; - - end_unlink: - pathrelse (&path); - err = journal_end(&th, dir->i_sb, jbegin_count) ; - reiserfs_check_path(&path) ; - if (err) - retval = err; -out_unlink: - reiserfs_write_unlock(dir->i_sb); - return retval; + int retval, err; + struct inode *inode; + struct reiserfs_transaction_handle th; + int jbegin_count; + INITIALIZE_PATH(path); + struct reiserfs_dir_entry de; + + /* we will be doing 2 balancings and update 2 stat data, we change quotas + * of the owner of the directory and of the owner of the parent directory. + * The quota structure is possibly deleted only on last iput => outside + * of this transaction */ + jbegin_count = + JOURNAL_PER_BALANCE_CNT * 2 + 2 + + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); + + reiserfs_write_lock(dir->i_sb); + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) + goto out_rmdir; + + de.de_gen_number_bit_string = NULL; + if ((retval = + reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, + &path, &de)) == NAME_NOT_FOUND) { + retval = -ENOENT; + goto end_rmdir; + } else if (retval == IO_ERROR) { + retval = -EIO; + goto end_rmdir; + } + + inode = dentry->d_inode; + + reiserfs_update_inode_transaction(inode); + reiserfs_update_inode_transaction(dir); + + if (de.de_objectid != inode->i_ino) { + // FIXME: compare key of an object and a key found in the + // entry + retval = -EIO; + goto end_rmdir; + } + if (!reiserfs_empty_dir(inode)) { + retval = -ENOTEMPTY; + goto end_rmdir; + } + + /* cut entry from dir directory */ + retval = reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, /* page */ + 0 /*new file size - not used here */ ); + if (retval < 0) + goto end_rmdir; + + if (inode->i_nlink != 2 && inode->i_nlink != 1) + reiserfs_warning(inode->i_sb, "%s: empty directory has nlink " + "!= 2 (%d)", __FUNCTION__, inode->i_nlink); + + inode->i_nlink = 0; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + reiserfs_update_sd(&th, inode); + + DEC_DIR_INODE_NLINK(dir) + dir->i_size -= (DEH_SIZE + de.de_entrylen); + reiserfs_update_sd(&th, dir); + + /* prevent empty directory from getting lost */ + add_save_link(&th, inode, 0 /* not truncate */ ); + + retval = journal_end(&th, dir->i_sb, jbegin_count); + reiserfs_check_path(&path); + out_rmdir: + reiserfs_write_unlock(dir->i_sb); + return retval; + + end_rmdir: + /* we must release path, because we did not call + reiserfs_cut_from_item, or reiserfs_cut_from_item does not + release path if operation was not complete */ + pathrelse(&path); + err = journal_end(&th, dir->i_sb, jbegin_count); + reiserfs_write_unlock(dir->i_sb); + return err ? err : retval; } -static int reiserfs_symlink (struct inode * parent_dir, - struct dentry * dentry, const char * symname) +static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) { - int retval; - struct inode * inode; - char * name; - int item_len; - struct reiserfs_transaction_handle th ; - int mode = S_IFLNK | S_IRWXUGO; - /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb)); - - if (!(inode = new_inode(parent_dir->i_sb))) { - return -ENOMEM ; - } - new_inode_init(inode, parent_dir, mode); - - reiserfs_write_lock(parent_dir->i_sb); - item_len = ROUND_UP (strlen (symname)); - if (item_len > MAX_DIRECT_ITEM_LEN (parent_dir->i_sb->s_blocksize)) { - retval = -ENAMETOOLONG; - drop_new_inode(inode); - goto out_failed; - } - - name = reiserfs_kmalloc (item_len, GFP_NOFS, parent_dir->i_sb); - if (!name) { - drop_new_inode(inode); - retval = -ENOMEM; - goto out_failed; - } - memcpy (name, symname, strlen (symname)); - padd_item (name, item_len, strlen (symname)); - - /* We would inherit the default ACL here, but symlinks don't get ACLs */ - - retval = journal_begin(&th, parent_dir->i_sb, jbegin_count) ; - if (retval) { - drop_new_inode (inode); - reiserfs_kfree (name, item_len, parent_dir->i_sb); - goto out_failed; - } - - retval = reiserfs_new_inode (&th, parent_dir, mode, name, strlen (symname), - dentry, inode); - reiserfs_kfree (name, item_len, parent_dir->i_sb); - if (retval) { /* reiserfs_new_inode iputs for us */ - goto out_failed; - } - - reiserfs_update_inode_transaction(inode) ; - reiserfs_update_inode_transaction(parent_dir) ; - - inode->i_op = &reiserfs_symlink_inode_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - - // must be sure this inode is written with this transaction - // - //reiserfs_update_sd (&th, inode, READ_BLOCKS); - - retval = reiserfs_add_entry (&th, parent_dir, dentry->d_name.name, - dentry->d_name.len, inode, 1/*visible*/); - if (retval) { - int err; + int retval, err; + struct inode *inode; + struct reiserfs_dir_entry de; + INITIALIZE_PATH(path); + struct reiserfs_transaction_handle th; + int jbegin_count; + unsigned long savelink; + + inode = dentry->d_inode; + + /* in this transaction we can be doing at max two balancings and update + * two stat datas, we change quotas of the owner of the directory and of + * the owner of the parent directory. The quota structure is possibly + * deleted only on iput => outside of this transaction */ + jbegin_count = + JOURNAL_PER_BALANCE_CNT * 2 + 2 + + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); + + reiserfs_write_lock(dir->i_sb); + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) + goto out_unlink; + + de.de_gen_number_bit_string = NULL; + if ((retval = + reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, + &path, &de)) == NAME_NOT_FOUND) { + retval = -ENOENT; + goto end_unlink; + } else if (retval == IO_ERROR) { + retval = -EIO; + goto end_unlink; + } + + reiserfs_update_inode_transaction(inode); + reiserfs_update_inode_transaction(dir); + + if (de.de_objectid != inode->i_ino) { + // FIXME: compare key of an object and a key found in the + // entry + retval = -EIO; + goto end_unlink; + } + + if (!inode->i_nlink) { + reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file " + "(%s:%lu), %d", __FUNCTION__, + reiserfs_bdevname(inode->i_sb), inode->i_ino, + inode->i_nlink); + inode->i_nlink = 1; + } + inode->i_nlink--; - reiserfs_update_sd (&th, inode); - err = journal_end(&th, parent_dir->i_sb, jbegin_count) ; + + /* + * we schedule before doing the add_save_link call, save the link + * count so we don't race + */ + savelink = inode->i_nlink; + + retval = + reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, + 0); + if (retval < 0) { + inode->i_nlink++; + goto end_unlink; + } + inode->i_ctime = CURRENT_TIME_SEC; + reiserfs_update_sd(&th, inode); + + dir->i_size -= (de.de_entrylen + DEH_SIZE); + dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + reiserfs_update_sd(&th, dir); + + if (!savelink) + /* prevent file from getting lost */ + add_save_link(&th, inode, 0 /* not truncate */ ); + + retval = journal_end(&th, dir->i_sb, jbegin_count); + reiserfs_check_path(&path); + reiserfs_write_unlock(dir->i_sb); + return retval; + + end_unlink: + pathrelse(&path); + err = journal_end(&th, dir->i_sb, jbegin_count); + reiserfs_check_path(&path); if (err) - retval = err; - iput (inode); - goto out_failed; - } - - d_instantiate(dentry, inode); - retval = journal_end(&th, parent_dir->i_sb, jbegin_count) ; -out_failed: - reiserfs_write_unlock(parent_dir->i_sb); - return retval; + retval = err; + out_unlink: + reiserfs_write_unlock(dir->i_sb); + return retval; } -static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry * dentry) +static int reiserfs_symlink(struct inode *parent_dir, + struct dentry *dentry, const char *symname) { - int retval; - struct inode *inode = old_dentry->d_inode; - struct reiserfs_transaction_handle th ; - /* We need blocks for transaction + update of quotas for the owners of the directory */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - reiserfs_write_lock(dir->i_sb); - if (inode->i_nlink >= REISERFS_LINK_MAX) { - //FIXME: sd_nlink is 32 bit for new files - reiserfs_write_unlock(dir->i_sb); - return -EMLINK; - } - if (inode->i_nlink == 0) { - reiserfs_write_unlock(dir->i_sb); - return -ENOENT; - } - - /* inc before scheduling so reiserfs_unlink knows we are here */ - inode->i_nlink++; - - retval = journal_begin(&th, dir->i_sb, jbegin_count) ; - if (retval) { - inode->i_nlink--; - reiserfs_write_unlock (dir->i_sb); - return retval; - } - - /* create new entry */ - retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, - inode, 1/*visible*/); - - reiserfs_update_inode_transaction(inode) ; - reiserfs_update_inode_transaction(dir) ; - - if (retval) { - int err; - inode->i_nlink--; - err = journal_end(&th, dir->i_sb, jbegin_count) ; - reiserfs_write_unlock(dir->i_sb); - return err ? err : retval; - } + int retval; + struct inode *inode; + char *name; + int item_len; + struct reiserfs_transaction_handle th; + int mode = S_IFLNK | S_IRWXUGO; + /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 3 + + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + + REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb)); + + if (!(inode = new_inode(parent_dir->i_sb))) { + return -ENOMEM; + } + new_inode_init(inode, parent_dir, mode); + + reiserfs_write_lock(parent_dir->i_sb); + item_len = ROUND_UP(strlen(symname)); + if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) { + retval = -ENAMETOOLONG; + drop_new_inode(inode); + goto out_failed; + } + + name = reiserfs_kmalloc(item_len, GFP_NOFS, parent_dir->i_sb); + if (!name) { + drop_new_inode(inode); + retval = -ENOMEM; + goto out_failed; + } + memcpy(name, symname, strlen(symname)); + padd_item(name, item_len, strlen(symname)); + + /* We would inherit the default ACL here, but symlinks don't get ACLs */ + + retval = journal_begin(&th, parent_dir->i_sb, jbegin_count); + if (retval) { + drop_new_inode(inode); + reiserfs_kfree(name, item_len, parent_dir->i_sb); + goto out_failed; + } + + retval = + reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname), + dentry, inode); + reiserfs_kfree(name, item_len, parent_dir->i_sb); + if (retval) { /* reiserfs_new_inode iputs for us */ + goto out_failed; + } - inode->i_ctime = CURRENT_TIME_SEC; - reiserfs_update_sd (&th, inode); + reiserfs_update_inode_transaction(inode); + reiserfs_update_inode_transaction(parent_dir); + + inode->i_op = &reiserfs_symlink_inode_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations; + + // must be sure this inode is written with this transaction + // + //reiserfs_update_sd (&th, inode, READ_BLOCKS); + + retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, + dentry->d_name.len, inode, 1 /*visible */ ); + if (retval) { + int err; + inode->i_nlink--; + reiserfs_update_sd(&th, inode); + err = journal_end(&th, parent_dir->i_sb, jbegin_count); + if (err) + retval = err; + iput(inode); + goto out_failed; + } - atomic_inc(&inode->i_count) ; - d_instantiate(dentry, inode); - retval = journal_end(&th, dir->i_sb, jbegin_count) ; - reiserfs_write_unlock(dir->i_sb); - return retval; + d_instantiate(dentry, inode); + retval = journal_end(&th, parent_dir->i_sb, jbegin_count); + out_failed: + reiserfs_write_unlock(parent_dir->i_sb); + return retval; } +static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + int retval; + struct inode *inode = old_dentry->d_inode; + struct reiserfs_transaction_handle th; + /* We need blocks for transaction + update of quotas for the owners of the directory */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 3 + + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); + + reiserfs_write_lock(dir->i_sb); + if (inode->i_nlink >= REISERFS_LINK_MAX) { + //FIXME: sd_nlink is 32 bit for new files + reiserfs_write_unlock(dir->i_sb); + return -EMLINK; + } + if (inode->i_nlink == 0) { + reiserfs_write_unlock(dir->i_sb); + return -ENOENT; + } + + /* inc before scheduling so reiserfs_unlink knows we are here */ + inode->i_nlink++; + + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) { + inode->i_nlink--; + reiserfs_write_unlock(dir->i_sb); + return retval; + } + + /* create new entry */ + retval = + reiserfs_add_entry(&th, dir, dentry->d_name.name, + dentry->d_name.len, inode, 1 /*visible */ ); + + reiserfs_update_inode_transaction(inode); + reiserfs_update_inode_transaction(dir); + + if (retval) { + int err; + inode->i_nlink--; + err = journal_end(&th, dir->i_sb, jbegin_count); + reiserfs_write_unlock(dir->i_sb); + return err ? err : retval; + } + + inode->i_ctime = CURRENT_TIME_SEC; + reiserfs_update_sd(&th, inode); + + atomic_inc(&inode->i_count); + d_instantiate(dentry, inode); + retval = journal_end(&th, dir->i_sb, jbegin_count); + reiserfs_write_unlock(dir->i_sb); + return retval; +} // de contains information pointing to an entry which -static int de_still_valid (const char * name, int len, struct reiserfs_dir_entry * de) +static int de_still_valid(const char *name, int len, + struct reiserfs_dir_entry *de) { - struct reiserfs_dir_entry tmp = *de; - - // recalculate pointer to name and name length - set_de_name_and_namelen (&tmp); - // FIXME: could check more - if (tmp.de_namelen != len || memcmp (name, de->de_name, len)) - return 0; - return 1; + struct reiserfs_dir_entry tmp = *de; + + // recalculate pointer to name and name length + set_de_name_and_namelen(&tmp); + // FIXME: could check more + if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) + return 0; + return 1; } - -static int entry_points_to_object (const char * name, int len, struct reiserfs_dir_entry * de, struct inode * inode) +static int entry_points_to_object(const char *name, int len, + struct reiserfs_dir_entry *de, + struct inode *inode) { - if (!de_still_valid (name, len, de)) - return 0; - - if (inode) { - if (!de_visible (de->de_deh + de->de_entry_num)) - reiserfs_panic (NULL, "vs-7042: entry_points_to_object: entry must be visible"); - return (de->de_objectid == inode->i_ino) ? 1 : 0; - } + if (!de_still_valid(name, len, de)) + return 0; + + if (inode) { + if (!de_visible(de->de_deh + de->de_entry_num)) + reiserfs_panic(NULL, + "vs-7042: entry_points_to_object: entry must be visible"); + return (de->de_objectid == inode->i_ino) ? 1 : 0; + } - /* this must be added hidden entry */ - if (de_visible (de->de_deh + de->de_entry_num)) - reiserfs_panic (NULL, "vs-7043: entry_points_to_object: entry must be visible"); + /* this must be added hidden entry */ + if (de_visible(de->de_deh + de->de_entry_num)) + reiserfs_panic(NULL, + "vs-7043: entry_points_to_object: entry must be visible"); - return 1; + return 1; } - /* sets key of objectid the entry has to point to */ -static void set_ino_in_dir_entry (struct reiserfs_dir_entry * de, struct reiserfs_key * key) +static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de, + struct reiserfs_key *key) { - /* JDM These operations are endian safe - both are le */ - de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id; - de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; + /* JDM These operations are endian safe - both are le */ + de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id; + de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; } - /* * process, that is going to call fix_nodes/do_balance must hold only * one path. If it holds 2 or more, it can get into endless waiting in * get_empty_nodes or its clones */ -static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, - struct inode * new_dir, struct dentry *new_dentry) +static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { - int retval; - INITIALIZE_PATH (old_entry_path); - INITIALIZE_PATH (new_entry_path); - INITIALIZE_PATH (dot_dot_entry_path); - struct item_head new_entry_ih, old_entry_ih, dot_dot_ih ; - struct reiserfs_dir_entry old_de, new_de, dot_dot_de; - struct inode * old_inode, * new_dentry_inode; - struct reiserfs_transaction_handle th ; - int jbegin_count ; - umode_t old_inode_mode; - unsigned long savelink = 1; - struct timespec ctime; - - /* three balancings: (1) old name removal, (2) new name insertion - and (3) maybe "save" link insertion - stat data updates: (1) old directory, - (2) new directory and (3) maybe old object stat data (when it is - directory) and (4) maybe stat data of object to which new entry - pointed initially and (5) maybe block containing ".." of - renamed directory - quota updates: two parent directories */ - jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); - - old_inode = old_dentry->d_inode; - new_dentry_inode = new_dentry->d_inode; - - // make sure, that oldname still exists and points to an object we - // are going to rename - old_de.de_gen_number_bit_string = NULL; - reiserfs_write_lock(old_dir->i_sb); - retval = reiserfs_find_entry (old_dir, old_dentry->d_name.name, old_dentry->d_name.len, - &old_entry_path, &old_de); - pathrelse (&old_entry_path); - if (retval == IO_ERROR) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) { - reiserfs_write_unlock(old_dir->i_sb); - return -ENOENT; - } - - old_inode_mode = old_inode->i_mode; - if (S_ISDIR(old_inode_mode)) { - // make sure, that directory being renamed has correct ".." - // and that its new parent directory has not too many links - // already - - if (new_dentry_inode) { - if (!reiserfs_empty_dir(new_dentry_inode)) { + int retval; + INITIALIZE_PATH(old_entry_path); + INITIALIZE_PATH(new_entry_path); + INITIALIZE_PATH(dot_dot_entry_path); + struct item_head new_entry_ih, old_entry_ih, dot_dot_ih; + struct reiserfs_dir_entry old_de, new_de, dot_dot_de; + struct inode *old_inode, *new_dentry_inode; + struct reiserfs_transaction_handle th; + int jbegin_count; + umode_t old_inode_mode; + unsigned long savelink = 1; + struct timespec ctime; + + /* three balancings: (1) old name removal, (2) new name insertion + and (3) maybe "save" link insertion + stat data updates: (1) old directory, + (2) new directory and (3) maybe old object stat data (when it is + directory) and (4) maybe stat data of object to which new entry + pointed initially and (5) maybe block containing ".." of + renamed directory + quota updates: two parent directories */ + jbegin_count = + JOURNAL_PER_BALANCE_CNT * 3 + 5 + + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); + + old_inode = old_dentry->d_inode; + new_dentry_inode = new_dentry->d_inode; + + // make sure, that oldname still exists and points to an object we + // are going to rename + old_de.de_gen_number_bit_string = NULL; + reiserfs_write_lock(old_dir->i_sb); + retval = + reiserfs_find_entry(old_dir, old_dentry->d_name.name, + old_dentry->d_name.len, &old_entry_path, + &old_de); + pathrelse(&old_entry_path); + if (retval == IO_ERROR) { reiserfs_write_unlock(old_dir->i_sb); - return -ENOTEMPTY; - } + return -EIO; } - - /* directory is renamed, its parent directory will be changed, - ** so find ".." entry - */ - dot_dot_de.de_gen_number_bit_string = NULL; - retval = reiserfs_find_entry (old_inode, "..", 2, &dot_dot_entry_path, &dot_dot_de); - pathrelse (&dot_dot_entry_path); - if (retval != NAME_FOUND) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; + + if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) { + reiserfs_write_unlock(old_dir->i_sb); + return -ENOENT; } - /* inode number of .. must equal old_dir->i_ino */ - if (dot_dot_de.de_objectid != old_dir->i_ino) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; + old_inode_mode = old_inode->i_mode; + if (S_ISDIR(old_inode_mode)) { + // make sure, that directory being renamed has correct ".." + // and that its new parent directory has not too many links + // already + + if (new_dentry_inode) { + if (!reiserfs_empty_dir(new_dentry_inode)) { + reiserfs_write_unlock(old_dir->i_sb); + return -ENOTEMPTY; + } + } + + /* directory is renamed, its parent directory will be changed, + ** so find ".." entry + */ + dot_dot_de.de_gen_number_bit_string = NULL; + retval = + reiserfs_find_entry(old_inode, "..", 2, &dot_dot_entry_path, + &dot_dot_de); + pathrelse(&dot_dot_entry_path); + if (retval != NAME_FOUND) { + reiserfs_write_unlock(old_dir->i_sb); + return -EIO; + } + + /* inode number of .. must equal old_dir->i_ino */ + if (dot_dot_de.de_objectid != old_dir->i_ino) { + reiserfs_write_unlock(old_dir->i_sb); + return -EIO; + } } - } - - retval = journal_begin(&th, old_dir->i_sb, jbegin_count) ; - if (retval) { - reiserfs_write_unlock (old_dir->i_sb); - return retval; - } - - /* add new entry (or find the existing one) */ - retval = reiserfs_add_entry (&th, new_dir, new_dentry->d_name.name, new_dentry->d_name.len, - old_inode, 0); - if (retval == -EEXIST) { - if (!new_dentry_inode) { - reiserfs_panic (old_dir->i_sb, - "vs-7050: new entry is found, new inode == 0\n"); + + retval = journal_begin(&th, old_dir->i_sb, jbegin_count); + if (retval) { + reiserfs_write_unlock(old_dir->i_sb); + return retval; } - } else if (retval) { - int err = journal_end(&th, old_dir->i_sb, jbegin_count) ; - reiserfs_write_unlock(old_dir->i_sb); - return err ? err : retval; - } - - reiserfs_update_inode_transaction(old_dir) ; - reiserfs_update_inode_transaction(new_dir) ; - - /* this makes it so an fsync on an open fd for the old name will - ** commit the rename operation - */ - reiserfs_update_inode_transaction(old_inode) ; - - if (new_dentry_inode) - reiserfs_update_inode_transaction(new_dentry_inode) ; - - while (1) { - // look for old name using corresponding entry key (found by reiserfs_find_entry) - if ((retval = search_by_entry_key (new_dir->i_sb, &old_de.de_entry_key, - &old_entry_path, &old_de)) != NAME_FOUND) { - pathrelse(&old_entry_path); - journal_end(&th, old_dir->i_sb, jbegin_count); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; + + /* add new entry (or find the existing one) */ + retval = + reiserfs_add_entry(&th, new_dir, new_dentry->d_name.name, + new_dentry->d_name.len, old_inode, 0); + if (retval == -EEXIST) { + if (!new_dentry_inode) { + reiserfs_panic(old_dir->i_sb, + "vs-7050: new entry is found, new inode == 0\n"); + } + } else if (retval) { + int err = journal_end(&th, old_dir->i_sb, jbegin_count); + reiserfs_write_unlock(old_dir->i_sb); + return err ? err : retval; } - copy_item_head(&old_entry_ih, get_ih(&old_entry_path)) ; - - reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1) ; - - // look for new name by reiserfs_find_entry - new_de.de_gen_number_bit_string = NULL; - retval = reiserfs_find_entry (new_dir, new_dentry->d_name.name, new_dentry->d_name.len, - &new_entry_path, &new_de); - // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from - // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. - if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { - pathrelse(&new_entry_path); - pathrelse(&old_entry_path); - journal_end(&th, old_dir->i_sb, jbegin_count); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; + reiserfs_update_inode_transaction(old_dir); + reiserfs_update_inode_transaction(new_dir); + + /* this makes it so an fsync on an open fd for the old name will + ** commit the rename operation + */ + reiserfs_update_inode_transaction(old_inode); + + if (new_dentry_inode) + reiserfs_update_inode_transaction(new_dentry_inode); + + while (1) { + // look for old name using corresponding entry key (found by reiserfs_find_entry) + if ((retval = + search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, + &old_entry_path, + &old_de)) != NAME_FOUND) { + pathrelse(&old_entry_path); + journal_end(&th, old_dir->i_sb, jbegin_count); + reiserfs_write_unlock(old_dir->i_sb); + return -EIO; + } + + copy_item_head(&old_entry_ih, get_ih(&old_entry_path)); + + reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); + + // look for new name by reiserfs_find_entry + new_de.de_gen_number_bit_string = NULL; + retval = + reiserfs_find_entry(new_dir, new_dentry->d_name.name, + new_dentry->d_name.len, &new_entry_path, + &new_de); + // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from + // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. + if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { + pathrelse(&new_entry_path); + pathrelse(&old_entry_path); + journal_end(&th, old_dir->i_sb, jbegin_count); + reiserfs_write_unlock(old_dir->i_sb); + return -EIO; + } + + copy_item_head(&new_entry_ih, get_ih(&new_entry_path)); + + reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1); + + if (S_ISDIR(old_inode->i_mode)) { + if ((retval = + search_by_entry_key(new_dir->i_sb, + &dot_dot_de.de_entry_key, + &dot_dot_entry_path, + &dot_dot_de)) != NAME_FOUND) { + pathrelse(&dot_dot_entry_path); + pathrelse(&new_entry_path); + pathrelse(&old_entry_path); + journal_end(&th, old_dir->i_sb, jbegin_count); + reiserfs_write_unlock(old_dir->i_sb); + return -EIO; + } + copy_item_head(&dot_dot_ih, + get_ih(&dot_dot_entry_path)); + // node containing ".." gets into transaction + reiserfs_prepare_for_journal(old_inode->i_sb, + dot_dot_de.de_bh, 1); + } + /* we should check seals here, not do + this stuff, yes? Then, having + gathered everything into RAM we + should lock the buffers, yes? -Hans */ + /* probably. our rename needs to hold more + ** than one path at once. The seals would + ** have to be written to deal with multi-path + ** issues -chris + */ + /* sanity checking before doing the rename - avoid races many + ** of the above checks could have scheduled. We have to be + ** sure our items haven't been shifted by another process. + */ + if (item_moved(&new_entry_ih, &new_entry_path) || + !entry_points_to_object(new_dentry->d_name.name, + new_dentry->d_name.len, + &new_de, new_dentry_inode) || + item_moved(&old_entry_ih, &old_entry_path) || + !entry_points_to_object(old_dentry->d_name.name, + old_dentry->d_name.len, + &old_de, old_inode)) { + reiserfs_restore_prepared_buffer(old_inode->i_sb, + new_de.de_bh); + reiserfs_restore_prepared_buffer(old_inode->i_sb, + old_de.de_bh); + if (S_ISDIR(old_inode_mode)) + reiserfs_restore_prepared_buffer(old_inode-> + i_sb, + dot_dot_de. + de_bh); + continue; + } + if (S_ISDIR(old_inode_mode)) { + if (item_moved(&dot_dot_ih, &dot_dot_entry_path) || + !entry_points_to_object("..", 2, &dot_dot_de, + old_dir)) { + reiserfs_restore_prepared_buffer(old_inode-> + i_sb, + old_de.de_bh); + reiserfs_restore_prepared_buffer(old_inode-> + i_sb, + new_de.de_bh); + reiserfs_restore_prepared_buffer(old_inode-> + i_sb, + dot_dot_de. + de_bh); + continue; + } + } + + RFALSE(S_ISDIR(old_inode_mode) && + !buffer_journal_prepared(dot_dot_de.de_bh), ""); + + break; } - copy_item_head(&new_entry_ih, get_ih(&new_entry_path)) ; + /* ok, all the changes can be done in one fell swoop when we + have claimed all the buffers needed. */ - reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1) ; + mark_de_visible(new_de.de_deh + new_de.de_entry_num); + set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); + journal_mark_dirty(&th, old_dir->i_sb, new_de.de_bh); - if (S_ISDIR(old_inode->i_mode)) { - if ((retval = search_by_entry_key (new_dir->i_sb, &dot_dot_de.de_entry_key, - &dot_dot_entry_path, &dot_dot_de)) != NAME_FOUND) { - pathrelse(&dot_dot_entry_path); - pathrelse(&new_entry_path); - pathrelse(&old_entry_path); - journal_end(&th, old_dir->i_sb, jbegin_count); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - copy_item_head(&dot_dot_ih, get_ih(&dot_dot_entry_path)) ; - // node containing ".." gets into transaction - reiserfs_prepare_for_journal(old_inode->i_sb, dot_dot_de.de_bh, 1) ; - } - /* we should check seals here, not do - this stuff, yes? Then, having - gathered everything into RAM we - should lock the buffers, yes? -Hans */ - /* probably. our rename needs to hold more - ** than one path at once. The seals would - ** have to be written to deal with multi-path - ** issues -chris - */ - /* sanity checking before doing the rename - avoid races many - ** of the above checks could have scheduled. We have to be - ** sure our items haven't been shifted by another process. - */ - if (item_moved(&new_entry_ih, &new_entry_path) || - !entry_points_to_object(new_dentry->d_name.name, - new_dentry->d_name.len, - &new_de, new_dentry_inode) || - item_moved(&old_entry_ih, &old_entry_path) || - !entry_points_to_object (old_dentry->d_name.name, - old_dentry->d_name.len, - &old_de, old_inode)) { - reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh); - reiserfs_restore_prepared_buffer (old_inode->i_sb, old_de.de_bh); - if (S_ISDIR(old_inode_mode)) - reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh); - continue; + mark_de_hidden(old_de.de_deh + old_de.de_entry_num); + journal_mark_dirty(&th, old_dir->i_sb, old_de.de_bh); + ctime = CURRENT_TIME_SEC; + old_dir->i_ctime = old_dir->i_mtime = ctime; + new_dir->i_ctime = new_dir->i_mtime = ctime; + /* thanks to Alex Adriaanse for patch which adds ctime update of + renamed object */ + old_inode->i_ctime = ctime; + + if (new_dentry_inode) { + // adjust link number of the victim + if (S_ISDIR(new_dentry_inode->i_mode)) { + new_dentry_inode->i_nlink = 0; + } else { + new_dentry_inode->i_nlink--; + } + new_dentry_inode->i_ctime = ctime; + savelink = new_dentry_inode->i_nlink; } + if (S_ISDIR(old_inode_mode)) { - if ( item_moved(&dot_dot_ih, &dot_dot_entry_path) || - !entry_points_to_object ( "..", 2, &dot_dot_de, old_dir) ) { - reiserfs_restore_prepared_buffer (old_inode->i_sb, old_de.de_bh); - reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh); - reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh); - continue; - } + // adjust ".." of renamed directory + set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); + journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); + + if (!new_dentry_inode) + /* there (in new_dir) was no directory, so it got new link + (".." of renamed directory) */ + INC_DIR_INODE_NLINK(new_dir); + + /* old directory lost one link - ".. " of renamed directory */ + DEC_DIR_INODE_NLINK(old_dir); } + // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse + pathrelse(&new_entry_path); + pathrelse(&dot_dot_entry_path); - RFALSE( S_ISDIR(old_inode_mode) && - !buffer_journal_prepared(dot_dot_de.de_bh), "" ); - - break; - } - - /* ok, all the changes can be done in one fell swoop when we - have claimed all the buffers needed.*/ - - mark_de_visible (new_de.de_deh + new_de.de_entry_num); - set_ino_in_dir_entry (&new_de, INODE_PKEY (old_inode)); - journal_mark_dirty (&th, old_dir->i_sb, new_de.de_bh); - - mark_de_hidden (old_de.de_deh + old_de.de_entry_num); - journal_mark_dirty (&th, old_dir->i_sb, old_de.de_bh); - ctime = CURRENT_TIME_SEC; - old_dir->i_ctime = old_dir->i_mtime = ctime; - new_dir->i_ctime = new_dir->i_mtime = ctime; - /* thanks to Alex Adriaanse for patch which adds ctime update of - renamed object */ - old_inode->i_ctime = ctime; - - if (new_dentry_inode) { - // adjust link number of the victim - if (S_ISDIR(new_dentry_inode->i_mode)) { - new_dentry_inode->i_nlink = 0; - } else { - new_dentry_inode->i_nlink--; + // FIXME: this reiserfs_cut_from_item's return value may screw up + // anybody, but it will panic if will not be able to find the + // entry. This needs one more clean up + if (reiserfs_cut_from_item + (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, + 0) < 0) + reiserfs_warning(old_dir->i_sb, + "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?"); + + old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; + + reiserfs_update_sd(&th, old_dir); + reiserfs_update_sd(&th, new_dir); + reiserfs_update_sd(&th, old_inode); + + if (new_dentry_inode) { + if (savelink == 0) + add_save_link(&th, new_dentry_inode, + 0 /* not truncate */ ); + reiserfs_update_sd(&th, new_dentry_inode); } - new_dentry_inode->i_ctime = ctime; - savelink = new_dentry_inode->i_nlink; - } - - if (S_ISDIR(old_inode_mode)) { - // adjust ".." of renamed directory - set_ino_in_dir_entry (&dot_dot_de, INODE_PKEY (new_dir)); - journal_mark_dirty (&th, new_dir->i_sb, dot_dot_de.de_bh); - - if (!new_dentry_inode) - /* there (in new_dir) was no directory, so it got new link - (".." of renamed directory) */ - INC_DIR_INODE_NLINK(new_dir); - - /* old directory lost one link - ".. " of renamed directory */ - DEC_DIR_INODE_NLINK(old_dir); - } - - // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse - pathrelse (&new_entry_path); - pathrelse (&dot_dot_entry_path); - - // FIXME: this reiserfs_cut_from_item's return value may screw up - // anybody, but it will panic if will not be able to find the - // entry. This needs one more clean up - if (reiserfs_cut_from_item (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 0) < 0) - reiserfs_warning (old_dir->i_sb, "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?"); - - old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; - - reiserfs_update_sd (&th, old_dir); - reiserfs_update_sd (&th, new_dir); - reiserfs_update_sd (&th, old_inode); - - if (new_dentry_inode) { - if (savelink == 0) - add_save_link (&th, new_dentry_inode, 0/* not truncate */); - reiserfs_update_sd (&th, new_dentry_inode); - } - - retval = journal_end(&th, old_dir->i_sb, jbegin_count) ; - reiserfs_write_unlock(old_dir->i_sb); - return retval; + + retval = journal_end(&th, old_dir->i_sb, jbegin_count); + reiserfs_write_unlock(old_dir->i_sb); + return retval; } /* * directories can handle most operations... */ struct inode_operations reiserfs_dir_inode_operations = { - //&reiserfs_dir_operations, /* default_file_ops */ - .create = reiserfs_create, - .lookup = reiserfs_lookup, - .link = reiserfs_link, - .unlink = reiserfs_unlink, - .symlink = reiserfs_symlink, - .mkdir = reiserfs_mkdir, - .rmdir = reiserfs_rmdir, - .mknod = reiserfs_mknod, - .rename = reiserfs_rename, - .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, - .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, - .permission = reiserfs_permission, + //&reiserfs_dir_operations, /* default_file_ops */ + .create = reiserfs_create, + .lookup = reiserfs_lookup, + .link = reiserfs_link, + .unlink = reiserfs_unlink, + .symlink = reiserfs_symlink, + .mkdir = reiserfs_mkdir, + .rmdir = reiserfs_rmdir, + .mknod = reiserfs_mknod, + .rename = reiserfs_rename, + .setattr = reiserfs_setattr, + .setxattr = reiserfs_setxattr, + .getxattr = reiserfs_getxattr, + .listxattr = reiserfs_listxattr, + .removexattr = reiserfs_removexattr, + .permission = reiserfs_permission, }; /* @@ -1467,28 +1554,27 @@ struct inode_operations reiserfs_dir_inode_operations = { * stuff added */ struct inode_operations reiserfs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, - .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, - .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, - .permission = reiserfs_permission, + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .setattr = reiserfs_setattr, + .setxattr = reiserfs_setxattr, + .getxattr = reiserfs_getxattr, + .listxattr = reiserfs_listxattr, + .removexattr = reiserfs_removexattr, + .permission = reiserfs_permission, }; - /* * special file operations.. just xattr/acl stuff */ struct inode_operations reiserfs_special_inode_operations = { - .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, - .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, - .permission = reiserfs_permission, + .setattr = reiserfs_setattr, + .setxattr = reiserfs_setxattr, + .getxattr = reiserfs_getxattr, + .listxattr = reiserfs_listxattr, + .removexattr = reiserfs_removexattr, + .permission = reiserfs_permission, }; diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c index bfe8e25ef29..f62590aa9c9 100644 --- a/fs/reiserfs/objectid.c +++ b/fs/reiserfs/objectid.c @@ -14,24 +14,24 @@ (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ (__le32 *)((rs) + 1)) - #ifdef CONFIG_REISERFS_CHECK -static void check_objectid_map (struct super_block * s, __le32 * map) +static void check_objectid_map(struct super_block *s, __le32 * map) { - if (le32_to_cpu (map[0]) != 1) - reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted: %lx", - ( long unsigned int ) le32_to_cpu (map[0])); + if (le32_to_cpu(map[0]) != 1) + reiserfs_panic(s, + "vs-15010: check_objectid_map: map corrupted: %lx", + (long unsigned int)le32_to_cpu(map[0])); - // FIXME: add something else here + // FIXME: add something else here } #else -static void check_objectid_map (struct super_block * s, __le32 * map) -{;} +static void check_objectid_map(struct super_block *s, __le32 * map) +{; +} #endif - /* When we allocate objectids we allocate the first unused objectid. Each sequence of objectids in use (the odd sequences) is followed by a sequence of objectids not in use (the even sequences). We @@ -46,161 +46,162 @@ static void check_objectid_map (struct super_block * s, __le32 * map) interesting optimizations of layout could result from complicating objectid assignment, but we have deferred making them for now. */ - /* get unique object identifier */ -__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th) +__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) { - struct super_block * s = th->t_super; - struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); - __le32 * map = objectid_map (s, rs); - __u32 unused_objectid; - - BUG_ON (!th->t_trans_id); + struct super_block *s = th->t_super; + struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); + __le32 *map = objectid_map(s, rs); + __u32 unused_objectid; + + BUG_ON(!th->t_trans_id); + + check_objectid_map(s, map); + + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); + /* comment needed -Hans */ + unused_objectid = le32_to_cpu(map[1]); + if (unused_objectid == U32_MAX) { + reiserfs_warning(s, "%s: no more object ids", __FUNCTION__); + reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)); + return 0; + } - check_objectid_map (s, map); + /* This incrementation allocates the first unused objectid. That + is to say, the first entry on the objectid map is the first + unused objectid, and by incrementing it we use it. See below + where we check to see if we eliminated a sequence of unused + objectids.... */ + map[1] = cpu_to_le32(unused_objectid + 1); + + /* Now we check to see if we eliminated the last remaining member of + the first even sequence (and can eliminate the sequence by + eliminating its last objectid from oids), and can collapse the + first two odd sequences into one sequence. If so, then the net + result is to eliminate a pair of objectids from oids. We do this + by shifting the entire map to the left. */ + if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { + memmove(map + 1, map + 3, + (sb_oid_cursize(rs) - 3) * sizeof(__u32)); + set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); + } - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; - /* comment needed -Hans */ - unused_objectid = le32_to_cpu (map[1]); - if (unused_objectid == U32_MAX) { - reiserfs_warning (s, "%s: no more object ids", __FUNCTION__); - reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)) ; - return 0; - } - - /* This incrementation allocates the first unused objectid. That - is to say, the first entry on the objectid map is the first - unused objectid, and by incrementing it we use it. See below - where we check to see if we eliminated a sequence of unused - objectids.... */ - map[1] = cpu_to_le32 (unused_objectid + 1); - - /* Now we check to see if we eliminated the last remaining member of - the first even sequence (and can eliminate the sequence by - eliminating its last objectid from oids), and can collapse the - first two odd sequences into one sequence. If so, then the net - result is to eliminate a pair of objectids from oids. We do this - by shifting the entire map to the left. */ - if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { - memmove (map + 1, map + 3, (sb_oid_cursize(rs) - 3) * sizeof(__u32)); - set_sb_oid_cursize( rs, sb_oid_cursize(rs) - 2 ); - } - - journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); - return unused_objectid; + journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); + return unused_objectid; } - /* makes object identifier unused */ -void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, - __u32 objectid_to_release) +void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, + __u32 objectid_to_release) { - struct super_block * s = th->t_super; - struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); - __le32 * map = objectid_map (s, rs); - int i = 0; - - BUG_ON (!th->t_trans_id); - //return; - check_objectid_map (s, map); - - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; - journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); - - /* start at the beginning of the objectid map (i = 0) and go to - the end of it (i = disk_sb->s_oid_cursize). Linear search is - what we use, though it is possible that binary search would be - more efficient after performing lots of deletions (which is - when oids is large.) We only check even i's. */ - while (i < sb_oid_cursize(rs)) { - if (objectid_to_release == le32_to_cpu (map[i])) { - /* This incrementation unallocates the objectid. */ - //map[i]++; - map[i] = cpu_to_le32 (le32_to_cpu (map[i]) + 1); - - /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ - if (map[i] == map[i+1]) { - /* shrink objectid map */ - memmove (map + i, map + i + 2, - (sb_oid_cursize(rs) - i - 2) * sizeof (__u32)); - //disk_sb->s_oid_cursize -= 2; - set_sb_oid_cursize( rs, sb_oid_cursize(rs) - 2 ); - - RFALSE( sb_oid_cursize(rs) < 2 || - sb_oid_cursize(rs) > sb_oid_maxsize(rs), - "vs-15005: objectid map corrupted cur_size == %d (max == %d)", - sb_oid_cursize(rs), sb_oid_maxsize(rs)); - } - return; + struct super_block *s = th->t_super; + struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); + __le32 *map = objectid_map(s, rs); + int i = 0; + + BUG_ON(!th->t_trans_id); + //return; + check_objectid_map(s, map); + + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); + journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); + + /* start at the beginning of the objectid map (i = 0) and go to + the end of it (i = disk_sb->s_oid_cursize). Linear search is + what we use, though it is possible that binary search would be + more efficient after performing lots of deletions (which is + when oids is large.) We only check even i's. */ + while (i < sb_oid_cursize(rs)) { + if (objectid_to_release == le32_to_cpu(map[i])) { + /* This incrementation unallocates the objectid. */ + //map[i]++; + map[i] = cpu_to_le32(le32_to_cpu(map[i]) + 1); + + /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ + if (map[i] == map[i + 1]) { + /* shrink objectid map */ + memmove(map + i, map + i + 2, + (sb_oid_cursize(rs) - i - + 2) * sizeof(__u32)); + //disk_sb->s_oid_cursize -= 2; + set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); + + RFALSE(sb_oid_cursize(rs) < 2 || + sb_oid_cursize(rs) > sb_oid_maxsize(rs), + "vs-15005: objectid map corrupted cur_size == %d (max == %d)", + sb_oid_cursize(rs), sb_oid_maxsize(rs)); + } + return; + } + + if (objectid_to_release > le32_to_cpu(map[i]) && + objectid_to_release < le32_to_cpu(map[i + 1])) { + /* size of objectid map is not changed */ + if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { + //objectid_map[i+1]--; + map[i + 1] = + cpu_to_le32(le32_to_cpu(map[i + 1]) - 1); + return; + } + + /* JDM comparing two little-endian values for equality -- safe */ + if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { + /* objectid map must be expanded, but there is no space */ + PROC_INFO_INC(s, leaked_oid); + return; + } + + /* expand the objectid map */ + memmove(map + i + 3, map + i + 1, + (sb_oid_cursize(rs) - i - 1) * sizeof(__u32)); + map[i + 1] = cpu_to_le32(objectid_to_release); + map[i + 2] = cpu_to_le32(objectid_to_release + 1); + set_sb_oid_cursize(rs, sb_oid_cursize(rs) + 2); + return; + } + i += 2; } - if (objectid_to_release > le32_to_cpu (map[i]) && - objectid_to_release < le32_to_cpu (map[i + 1])) { - /* size of objectid map is not changed */ - if (objectid_to_release + 1 == le32_to_cpu (map[i + 1])) { - //objectid_map[i+1]--; - map[i + 1] = cpu_to_le32 (le32_to_cpu (map[i + 1]) - 1); - return; - } - - /* JDM comparing two little-endian values for equality -- safe */ - if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { - /* objectid map must be expanded, but there is no space */ - PROC_INFO_INC( s, leaked_oid ); - return; - } + reiserfs_warning(s, + "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)", + (long unsigned)objectid_to_release); +} - /* expand the objectid map*/ - memmove (map + i + 3, map + i + 1, - (sb_oid_cursize(rs) - i - 1) * sizeof(__u32)); - map[i + 1] = cpu_to_le32 (objectid_to_release); - map[i + 2] = cpu_to_le32 (objectid_to_release + 1); - set_sb_oid_cursize( rs, sb_oid_cursize(rs) + 2 ); - return; +int reiserfs_convert_objectid_map_v1(struct super_block *s) +{ + struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK(s); + int cur_size = sb_oid_cursize(disk_sb); + int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2; + int old_max = sb_oid_maxsize(disk_sb); + struct reiserfs_super_block_v1 *disk_sb_v1; + __le32 *objectid_map, *new_objectid_map; + int i; + + disk_sb_v1 = + (struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data); + objectid_map = (__le32 *) (disk_sb_v1 + 1); + new_objectid_map = (__le32 *) (disk_sb + 1); + + if (cur_size > new_size) { + /* mark everyone used that was listed as free at the end of the objectid + ** map + */ + objectid_map[new_size - 1] = objectid_map[cur_size - 1]; + set_sb_oid_cursize(disk_sb, new_size); + } + /* move the smaller objectid map past the end of the new super */ + for (i = new_size - 1; i >= 0; i--) { + objectid_map[i + (old_max - new_size)] = objectid_map[i]; } - i += 2; - } - reiserfs_warning (s, "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)", - ( long unsigned ) objectid_to_release); -} + /* set the max size so we don't overflow later */ + set_sb_oid_maxsize(disk_sb, new_size); + /* Zero out label and generate random UUID */ + memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label)); + generate_random_uuid(disk_sb->s_uuid); -int reiserfs_convert_objectid_map_v1(struct super_block *s) { - struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK (s); - int cur_size = sb_oid_cursize(disk_sb); - int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ; - int old_max = sb_oid_maxsize(disk_sb); - struct reiserfs_super_block_v1 *disk_sb_v1 ; - __le32 *objectid_map, *new_objectid_map ; - int i ; - - disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data); - objectid_map = (__le32 *)(disk_sb_v1 + 1) ; - new_objectid_map = (__le32 *)(disk_sb + 1) ; - - if (cur_size > new_size) { - /* mark everyone used that was listed as free at the end of the objectid - ** map - */ - objectid_map[new_size - 1] = objectid_map[cur_size - 1] ; - set_sb_oid_cursize(disk_sb,new_size) ; - } - /* move the smaller objectid map past the end of the new super */ - for (i = new_size - 1 ; i >= 0 ; i--) { - objectid_map[i + (old_max - new_size)] = objectid_map[i] ; - } - - - /* set the max size so we don't overflow later */ - set_sb_oid_maxsize(disk_sb,new_size) ; - - /* Zero out label and generate random UUID */ - memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label)) ; - generate_random_uuid(disk_sb->s_uuid); - - /* finally, zero out the unused chunk of the new super */ - memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused)) ; - return 0 ; + /* finally, zero out the unused chunk of the new super */ + memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused)); + return 0; } - diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 16fdca1d4bd..d55e164bd5c 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -15,168 +15,166 @@ static char error_buf[1024]; static char fmt_buf[1024]; static char off_buf[80]; - -static char * reiserfs_cpu_offset (struct cpu_key * key) +static char *reiserfs_cpu_offset(struct cpu_key *key) { - if (cpu_key_k_type(key) == TYPE_DIRENTRY) - sprintf (off_buf, "%Lu(%Lu)", - (unsigned long long)GET_HASH_VALUE (cpu_key_k_offset (key)), - (unsigned long long)GET_GENERATION_NUMBER (cpu_key_k_offset (key))); - else - sprintf (off_buf, "0x%Lx", (unsigned long long)cpu_key_k_offset (key)); - return off_buf; + if (cpu_key_k_type(key) == TYPE_DIRENTRY) + sprintf(off_buf, "%Lu(%Lu)", + (unsigned long long) + GET_HASH_VALUE(cpu_key_k_offset(key)), + (unsigned long long) + GET_GENERATION_NUMBER(cpu_key_k_offset(key))); + else + sprintf(off_buf, "0x%Lx", + (unsigned long long)cpu_key_k_offset(key)); + return off_buf; } - -static char * le_offset (struct reiserfs_key * key) +static char *le_offset(struct reiserfs_key *key) { - int version; + int version; - version = le_key_version (key); - if (le_key_k_type (version, key) == TYPE_DIRENTRY) - sprintf (off_buf, "%Lu(%Lu)", - (unsigned long long)GET_HASH_VALUE (le_key_k_offset (version, key)), - (unsigned long long)GET_GENERATION_NUMBER (le_key_k_offset (version, key))); - else - sprintf (off_buf, "0x%Lx", (unsigned long long)le_key_k_offset (version, key)); - return off_buf; + version = le_key_version(key); + if (le_key_k_type(version, key) == TYPE_DIRENTRY) + sprintf(off_buf, "%Lu(%Lu)", + (unsigned long long) + GET_HASH_VALUE(le_key_k_offset(version, key)), + (unsigned long long) + GET_GENERATION_NUMBER(le_key_k_offset(version, key))); + else + sprintf(off_buf, "0x%Lx", + (unsigned long long)le_key_k_offset(version, key)); + return off_buf; } - -static char * cpu_type (struct cpu_key * key) +static char *cpu_type(struct cpu_key *key) { - if (cpu_key_k_type (key) == TYPE_STAT_DATA) - return "SD"; - if (cpu_key_k_type (key) == TYPE_DIRENTRY) - return "DIR"; - if (cpu_key_k_type (key) == TYPE_DIRECT) - return "DIRECT"; - if (cpu_key_k_type (key) == TYPE_INDIRECT) - return "IND"; - return "UNKNOWN"; + if (cpu_key_k_type(key) == TYPE_STAT_DATA) + return "SD"; + if (cpu_key_k_type(key) == TYPE_DIRENTRY) + return "DIR"; + if (cpu_key_k_type(key) == TYPE_DIRECT) + return "DIRECT"; + if (cpu_key_k_type(key) == TYPE_INDIRECT) + return "IND"; + return "UNKNOWN"; } - -static char * le_type (struct reiserfs_key * key) +static char *le_type(struct reiserfs_key *key) { - int version; - - version = le_key_version (key); + int version; - if (le_key_k_type (version, key) == TYPE_STAT_DATA) - return "SD"; - if (le_key_k_type (version, key) == TYPE_DIRENTRY) - return "DIR"; - if (le_key_k_type (version, key) == TYPE_DIRECT) - return "DIRECT"; - if (le_key_k_type (version, key) == TYPE_INDIRECT) - return "IND"; - return "UNKNOWN"; -} + version = le_key_version(key); + if (le_key_k_type(version, key) == TYPE_STAT_DATA) + return "SD"; + if (le_key_k_type(version, key) == TYPE_DIRENTRY) + return "DIR"; + if (le_key_k_type(version, key) == TYPE_DIRECT) + return "DIRECT"; + if (le_key_k_type(version, key) == TYPE_INDIRECT) + return "IND"; + return "UNKNOWN"; +} /* %k */ -static void sprintf_le_key (char * buf, struct reiserfs_key * key) +static void sprintf_le_key(char *buf, struct reiserfs_key *key) { - if (key) - sprintf (buf, "[%d %d %s %s]", le32_to_cpu (key->k_dir_id), - le32_to_cpu (key->k_objectid), le_offset (key), le_type (key)); - else - sprintf (buf, "[NULL]"); + if (key) + sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id), + le32_to_cpu(key->k_objectid), le_offset(key), + le_type(key)); + else + sprintf(buf, "[NULL]"); } - /* %K */ -static void sprintf_cpu_key (char * buf, struct cpu_key * key) +static void sprintf_cpu_key(char *buf, struct cpu_key *key) { - if (key) - sprintf (buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, - key->on_disk_key.k_objectid, reiserfs_cpu_offset (key), - cpu_type (key)); - else - sprintf (buf, "[NULL]"); + if (key) + sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, + key->on_disk_key.k_objectid, reiserfs_cpu_offset(key), + cpu_type(key)); + else + sprintf(buf, "[NULL]"); } -static void sprintf_de_head( char *buf, struct reiserfs_de_head *deh ) +static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh) { - if( deh ) - sprintf( buf, "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", deh_offset(deh), deh_dir_id(deh), - deh_objectid(deh), deh_location(deh), deh_state(deh) ); - else - sprintf( buf, "[NULL]" ); + if (deh) + sprintf(buf, + "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", + deh_offset(deh), deh_dir_id(deh), deh_objectid(deh), + deh_location(deh), deh_state(deh)); + else + sprintf(buf, "[NULL]"); } -static void sprintf_item_head (char * buf, struct item_head * ih) +static void sprintf_item_head(char *buf, struct item_head *ih) { - if (ih) { - strcpy (buf, (ih_version (ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); - sprintf_le_key (buf + strlen (buf), &(ih->ih_key)); - sprintf (buf + strlen (buf), ", item_len %d, item_location %d, " - "free_space(entry_count) %d", - ih_item_len(ih), ih_location(ih), ih_free_space (ih)); - } else - sprintf (buf, "[NULL]"); + if (ih) { + strcpy(buf, + (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); + sprintf_le_key(buf + strlen(buf), &(ih->ih_key)); + sprintf(buf + strlen(buf), ", item_len %d, item_location %d, " + "free_space(entry_count) %d", + ih_item_len(ih), ih_location(ih), ih_free_space(ih)); + } else + sprintf(buf, "[NULL]"); } - -static void sprintf_direntry (char * buf, struct reiserfs_dir_entry * de) +static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de) { - char name[20]; + char name[20]; - memcpy (name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); - name [de->de_namelen > 19 ? 19 : de->de_namelen] = 0; - sprintf (buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); + memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); + name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; + sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); } - -static void sprintf_block_head (char * buf, struct buffer_head * bh) +static void sprintf_block_head(char *buf, struct buffer_head *bh) { - sprintf (buf, "level=%d, nr_items=%d, free_space=%d rdkey ", - B_LEVEL (bh), B_NR_ITEMS (bh), B_FREE_SPACE (bh)); + sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ", + B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); } - -static void sprintf_buffer_head (char * buf, struct buffer_head * bh) +static void sprintf_buffer_head(char *buf, struct buffer_head *bh) { - char b[BDEVNAME_SIZE]; + char b[BDEVNAME_SIZE]; - sprintf (buf, "dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", - bdevname (bh->b_bdev, b), bh->b_size, - (unsigned long long)bh->b_blocknr, - atomic_read (&(bh->b_count)), - bh->b_state, bh->b_page, - buffer_uptodate (bh) ? "UPTODATE" : "!UPTODATE", - buffer_dirty (bh) ? "DIRTY" : "CLEAN", - buffer_locked (bh) ? "LOCKED" : "UNLOCKED"); + sprintf(buf, + "dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", + bdevname(bh->b_bdev, b), bh->b_size, + (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), + bh->b_state, bh->b_page, + buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", + buffer_dirty(bh) ? "DIRTY" : "CLEAN", + buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); } - -static void sprintf_disk_child (char * buf, struct disk_child * dc) +static void sprintf_disk_child(char *buf, struct disk_child *dc) { - sprintf (buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), dc_size(dc)); + sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), + dc_size(dc)); } - -static char * is_there_reiserfs_struct (char * fmt, int * what, int * skip) +static char *is_there_reiserfs_struct(char *fmt, int *what, int *skip) { - char * k = fmt; + char *k = fmt; - *skip = 0; - - while ((k = strchr (k, '%')) != NULL) - { - if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' || - k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a' ) { - *what = k[1]; - break; - } - (*skip) ++; - k ++; - } - return k; -} + *skip = 0; + while ((k = strchr(k, '%')) != NULL) { + if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' || + k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') { + *what = k[1]; + break; + } + (*skip)++; + k++; + } + return k; +} /* debugging reiserfs we used to print out a lot of different variables, like keys, item headers, buffer heads etc. Values of @@ -191,61 +189,64 @@ static char * is_there_reiserfs_struct (char * fmt, int * what, int * skip) key->k_offset, key->k_uniqueness); */ - -static void -prepare_error_buf( const char *fmt, va_list args ) -{ - char * fmt1 = fmt_buf; - char * k; - char * p = error_buf; - int i, j, what, skip; - - strcpy (fmt1, fmt); - - while( (k = is_there_reiserfs_struct( fmt1, &what, &skip )) != NULL ) - { - *k = 0; - - p += vsprintf (p, fmt1, args); - - for (i = 0; i < skip; i ++) - j = va_arg (args, int); - - switch (what) { - case 'k': - sprintf_le_key (p, va_arg(args, struct reiserfs_key *)); - break; - case 'K': - sprintf_cpu_key (p, va_arg(args, struct cpu_key *)); - break; - case 'h': - sprintf_item_head (p, va_arg(args, struct item_head *)); - break; - case 't': - sprintf_direntry (p, va_arg(args, struct reiserfs_dir_entry *)); - break; - case 'y': - sprintf_disk_child (p, va_arg(args, struct disk_child *)); - break; - case 'z': - sprintf_block_head (p, va_arg(args, struct buffer_head *)); - break; - case 'b': - sprintf_buffer_head (p, va_arg(args, struct buffer_head *)); - break; - case 'a': - sprintf_de_head (p, va_arg(args, struct reiserfs_de_head *)); - break; - } - - p += strlen (p); - fmt1 = k + 2; - } - vsprintf (p, fmt1, args); +static void prepare_error_buf(const char *fmt, va_list args) +{ + char *fmt1 = fmt_buf; + char *k; + char *p = error_buf; + int i, j, what, skip; + + strcpy(fmt1, fmt); + + while ((k = is_there_reiserfs_struct(fmt1, &what, &skip)) != NULL) { + *k = 0; + + p += vsprintf(p, fmt1, args); + + for (i = 0; i < skip; i++) + j = va_arg(args, int); + + switch (what) { + case 'k': + sprintf_le_key(p, va_arg(args, struct reiserfs_key *)); + break; + case 'K': + sprintf_cpu_key(p, va_arg(args, struct cpu_key *)); + break; + case 'h': + sprintf_item_head(p, va_arg(args, struct item_head *)); + break; + case 't': + sprintf_direntry(p, + va_arg(args, + struct reiserfs_dir_entry *)); + break; + case 'y': + sprintf_disk_child(p, + va_arg(args, struct disk_child *)); + break; + case 'z': + sprintf_block_head(p, + va_arg(args, struct buffer_head *)); + break; + case 'b': + sprintf_buffer_head(p, + va_arg(args, struct buffer_head *)); + break; + case 'a': + sprintf_de_head(p, + va_arg(args, + struct reiserfs_de_head *)); + break; + } + + p += strlen(p); + fmt1 = k + 2; + } + vsprintf(p, fmt1, args); } - /* in addition to usual conversion specifiers this accepts reiserfs specific conversion specifiers: %k to print little endian key, @@ -264,43 +265,43 @@ prepare_error_buf( const char *fmt, va_list args ) va_end( args );\ } -void reiserfs_warning (struct super_block *sb, const char * fmt, ...) +void reiserfs_warning(struct super_block *sb, const char *fmt, ...) { - do_reiserfs_warning(fmt); - if (sb) - printk (KERN_WARNING "ReiserFS: %s: warning: %s\n", - reiserfs_bdevname (sb), error_buf); - else - printk (KERN_WARNING "ReiserFS: warning: %s\n", error_buf); + do_reiserfs_warning(fmt); + if (sb) + printk(KERN_WARNING "ReiserFS: %s: warning: %s\n", + reiserfs_bdevname(sb), error_buf); + else + printk(KERN_WARNING "ReiserFS: warning: %s\n", error_buf); } /* No newline.. reiserfs_info calls can be followed by printk's */ -void reiserfs_info (struct super_block *sb, const char * fmt, ...) +void reiserfs_info(struct super_block *sb, const char *fmt, ...) { - do_reiserfs_warning(fmt); - if (sb) - printk (KERN_NOTICE "ReiserFS: %s: %s", - reiserfs_bdevname (sb), error_buf); - else - printk (KERN_NOTICE "ReiserFS: %s", error_buf); + do_reiserfs_warning(fmt); + if (sb) + printk(KERN_NOTICE "ReiserFS: %s: %s", + reiserfs_bdevname(sb), error_buf); + else + printk(KERN_NOTICE "ReiserFS: %s", error_buf); } /* No newline.. reiserfs_printk calls can be followed by printk's */ -static void reiserfs_printk (const char * fmt, ...) +static void reiserfs_printk(const char *fmt, ...) { - do_reiserfs_warning(fmt); - printk (error_buf); + do_reiserfs_warning(fmt); + printk(error_buf); } -void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...) +void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) { #ifdef CONFIG_REISERFS_CHECK - do_reiserfs_warning(fmt); - if (s) - printk (KERN_DEBUG "ReiserFS: %s: %s\n", - reiserfs_bdevname (s), error_buf); - else - printk (KERN_DEBUG "ReiserFS: %s\n", error_buf); + do_reiserfs_warning(fmt); + if (s) + printk(KERN_DEBUG "ReiserFS: %s: %s\n", + reiserfs_bdevname(s), error_buf); + else + printk(KERN_DEBUG "ReiserFS: %s\n", error_buf); #endif } @@ -349,379 +350,403 @@ void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...) . */ - #ifdef CONFIG_REISERFS_CHECK -extern struct tree_balance * cur_tb; +extern struct tree_balance *cur_tb; #endif -void reiserfs_panic (struct super_block * sb, const char * fmt, ...) +void reiserfs_panic(struct super_block *sb, const char *fmt, ...) { - do_reiserfs_warning(fmt); - printk (KERN_EMERG "REISERFS: panic (device %s): %s\n", - reiserfs_bdevname (sb), error_buf); - BUG (); + do_reiserfs_warning(fmt); + printk(KERN_EMERG "REISERFS: panic (device %s): %s\n", + reiserfs_bdevname(sb), error_buf); + BUG(); - /* this is not actually called, but makes reiserfs_panic() "noreturn" */ - panic ("REISERFS: panic (device %s): %s\n", - reiserfs_bdevname (sb), error_buf); + /* this is not actually called, but makes reiserfs_panic() "noreturn" */ + panic("REISERFS: panic (device %s): %s\n", + reiserfs_bdevname(sb), error_buf); } -void -reiserfs_abort (struct super_block *sb, int errno, const char *fmt, ...) +void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) { - do_reiserfs_warning (fmt); + do_reiserfs_warning(fmt); - if (reiserfs_error_panic (sb)) { - panic (KERN_CRIT "REISERFS: panic (device %s): %s\n", - reiserfs_bdevname (sb), error_buf); - } + if (reiserfs_error_panic(sb)) { + panic(KERN_CRIT "REISERFS: panic (device %s): %s\n", + reiserfs_bdevname(sb), error_buf); + } - if (sb->s_flags & MS_RDONLY) - return; + if (sb->s_flags & MS_RDONLY) + return; - printk (KERN_CRIT "REISERFS: abort (device %s): %s\n", - reiserfs_bdevname (sb), error_buf); + printk(KERN_CRIT "REISERFS: abort (device %s): %s\n", + reiserfs_bdevname(sb), error_buf); - sb->s_flags |= MS_RDONLY; - reiserfs_journal_abort (sb, errno); + sb->s_flags |= MS_RDONLY; + reiserfs_journal_abort(sb, errno); } /* this prints internal nodes (4 keys/items in line) (dc_number, dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, dc_size)...*/ -static int print_internal (struct buffer_head * bh, int first, int last) +static int print_internal(struct buffer_head *bh, int first, int last) { - struct reiserfs_key * key; - struct disk_child * dc; - int i; - int from, to; - - if (!B_IS_KEYS_LEVEL (bh)) - return 1; - - check_internal (bh); - - if (first == -1) { - from = 0; - to = B_NR_ITEMS (bh); - } else { - from = first; - to = last < B_NR_ITEMS (bh) ? last : B_NR_ITEMS (bh); - } - - reiserfs_printk ("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh); - - dc = B_N_CHILD (bh, from); - reiserfs_printk ("PTR %d: %y ", from, dc); - - for (i = from, key = B_N_PDELIM_KEY (bh, from), dc ++; i < to; i ++, key ++, dc ++) { - reiserfs_printk ("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); - if (i && i % 4 == 0) - printk ("\n"); - } - printk ("\n"); - return 0; -} + struct reiserfs_key *key; + struct disk_child *dc; + int i; + int from, to; + if (!B_IS_KEYS_LEVEL(bh)) + return 1; + check_internal(bh); + if (first == -1) { + from = 0; + to = B_NR_ITEMS(bh); + } else { + from = first; + to = last < B_NR_ITEMS(bh) ? last : B_NR_ITEMS(bh); + } + reiserfs_printk("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh); -static int print_leaf (struct buffer_head * bh, int print_mode, int first, int last) -{ - struct block_head * blkh; - struct item_head * ih; - int i, nr; - int from, to; + dc = B_N_CHILD(bh, from); + reiserfs_printk("PTR %d: %y ", from, dc); - if (!B_IS_ITEMS_LEVEL (bh)) - return 1; + for (i = from, key = B_N_PDELIM_KEY(bh, from), dc++; i < to; + i++, key++, dc++) { + reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); + if (i && i % 4 == 0) + printk("\n"); + } + printk("\n"); + return 0; +} - check_leaf (bh); +static int print_leaf(struct buffer_head *bh, int print_mode, int first, + int last) +{ + struct block_head *blkh; + struct item_head *ih; + int i, nr; + int from, to; - blkh = B_BLK_HEAD (bh); - ih = B_N_PITEM_HEAD (bh,0); - nr = blkh_nr_item(blkh); + if (!B_IS_ITEMS_LEVEL(bh)) + return 1; - printk ("\n===================================================================\n"); - reiserfs_printk ("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh); + check_leaf(bh); - if (!(print_mode & PRINT_LEAF_ITEMS)) { - reiserfs_printk ("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n", - &(ih->ih_key), &((ih + nr - 1)->ih_key)); - return 0; - } + blkh = B_BLK_HEAD(bh); + ih = B_N_PITEM_HEAD(bh, 0); + nr = blkh_nr_item(blkh); - if (first < 0 || first > nr - 1) - from = 0; - else - from = first; + printk + ("\n===================================================================\n"); + reiserfs_printk("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh); - if (last < 0 || last > nr ) - to = nr; - else - to = last; + if (!(print_mode & PRINT_LEAF_ITEMS)) { + reiserfs_printk("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n", + &(ih->ih_key), &((ih + nr - 1)->ih_key)); + return 0; + } - ih += from; - printk ("-------------------------------------------------------------------------------\n"); - printk ("|##| type | key | ilen | free_space | version | loc |\n"); - for (i = from; i < to; i++, ih ++) { - printk ("-------------------------------------------------------------------------------\n"); - reiserfs_printk ("|%2d| %h |\n", i, ih); - if (print_mode & PRINT_LEAF_ITEMS) - op_print_item (ih, B_I_PITEM (bh, ih)); - } + if (first < 0 || first > nr - 1) + from = 0; + else + from = first; + + if (last < 0 || last > nr) + to = nr; + else + to = last; + + ih += from; + printk + ("-------------------------------------------------------------------------------\n"); + printk + ("|##| type | key | ilen | free_space | version | loc |\n"); + for (i = from; i < to; i++, ih++) { + printk + ("-------------------------------------------------------------------------------\n"); + reiserfs_printk("|%2d| %h |\n", i, ih); + if (print_mode & PRINT_LEAF_ITEMS) + op_print_item(ih, B_I_PITEM(bh, ih)); + } - printk ("===================================================================\n"); + printk + ("===================================================================\n"); - return 0; + return 0; } -char * reiserfs_hashname(int code) +char *reiserfs_hashname(int code) { - if ( code == YURA_HASH) - return "rupasov"; - if ( code == TEA_HASH) - return "tea"; - if ( code == R5_HASH) - return "r5"; + if (code == YURA_HASH) + return "rupasov"; + if (code == TEA_HASH) + return "tea"; + if (code == R5_HASH) + return "r5"; - return "unknown"; + return "unknown"; } /* return 1 if this is not super block */ -static int print_super_block (struct buffer_head * bh) -{ - struct reiserfs_super_block * rs = (struct reiserfs_super_block *)(bh->b_data); - int skipped, data_blocks; - char *version; - char b[BDEVNAME_SIZE]; - - if (is_reiserfs_3_5(rs)) { - version = "3.5"; - } else if (is_reiserfs_3_6(rs)) { - version = "3.6"; - } else if (is_reiserfs_jr(rs)) { - version = ((sb_version(rs) == REISERFS_VERSION_2) ? - "3.6" : "3.5"); - } else { - return 1; - } - - printk ("%s\'s super block is in block %llu\n", bdevname (bh->b_bdev, b), - (unsigned long long)bh->b_blocknr); - printk ("Reiserfs version %s\n", version ); - printk ("Block count %u\n", sb_block_count(rs)); - printk ("Blocksize %d\n", sb_blocksize(rs)); - printk ("Free blocks %u\n", sb_free_blocks(rs)); - // FIXME: this would be confusing if - // someone stores reiserfs super block in some data block ;) +static int print_super_block(struct buffer_head *bh) +{ + struct reiserfs_super_block *rs = + (struct reiserfs_super_block *)(bh->b_data); + int skipped, data_blocks; + char *version; + char b[BDEVNAME_SIZE]; + + if (is_reiserfs_3_5(rs)) { + version = "3.5"; + } else if (is_reiserfs_3_6(rs)) { + version = "3.6"; + } else if (is_reiserfs_jr(rs)) { + version = ((sb_version(rs) == REISERFS_VERSION_2) ? + "3.6" : "3.5"); + } else { + return 1; + } + + printk("%s\'s super block is in block %llu\n", bdevname(bh->b_bdev, b), + (unsigned long long)bh->b_blocknr); + printk("Reiserfs version %s\n", version); + printk("Block count %u\n", sb_block_count(rs)); + printk("Blocksize %d\n", sb_blocksize(rs)); + printk("Free blocks %u\n", sb_free_blocks(rs)); + // FIXME: this would be confusing if + // someone stores reiserfs super block in some data block ;) // skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); - skipped = bh->b_blocknr; - data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - - (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + 1 : sb_reserved_for_journal(rs)) - - sb_free_blocks(rs); - printk ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n" - "1 super block, %d data blocks\n", - skipped, sb_bmap_nr(rs), (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) : - sb_reserved_for_journal(rs)) , data_blocks); - printk ("Root block %u\n", sb_root_block(rs)); - printk ("Journal block (first) %d\n", sb_jp_journal_1st_block(rs)); - printk ("Journal dev %d\n", sb_jp_journal_dev(rs)); - printk ("Journal orig size %d\n", sb_jp_journal_size(rs)); - printk ("FS state %d\n", sb_fs_state(rs)); - printk ("Hash function \"%s\"\n", - reiserfs_hashname(sb_hash_function_code(rs))); - - printk ("Tree height %d\n", sb_tree_height(rs)); - return 0; + skipped = bh->b_blocknr; + data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - + (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + + 1 : sb_reserved_for_journal(rs)) - sb_free_blocks(rs); + printk + ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n" + "1 super block, %d data blocks\n", skipped, sb_bmap_nr(rs), + (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) : + sb_reserved_for_journal(rs)), data_blocks); + printk("Root block %u\n", sb_root_block(rs)); + printk("Journal block (first) %d\n", sb_jp_journal_1st_block(rs)); + printk("Journal dev %d\n", sb_jp_journal_dev(rs)); + printk("Journal orig size %d\n", sb_jp_journal_size(rs)); + printk("FS state %d\n", sb_fs_state(rs)); + printk("Hash function \"%s\"\n", + reiserfs_hashname(sb_hash_function_code(rs))); + + printk("Tree height %d\n", sb_tree_height(rs)); + return 0; } -static int print_desc_block (struct buffer_head * bh) +static int print_desc_block(struct buffer_head *bh) { - struct reiserfs_journal_desc * desc; + struct reiserfs_journal_desc *desc; - if (memcmp(get_journal_desc_magic (bh), JOURNAL_DESC_MAGIC, 8)) - return 1; + if (memcmp(get_journal_desc_magic(bh), JOURNAL_DESC_MAGIC, 8)) + return 1; - desc = (struct reiserfs_journal_desc *)(bh->b_data); - printk ("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)", - (unsigned long long)bh->b_blocknr, get_desc_trans_id (desc), get_desc_mount_id (desc), - get_desc_trans_len (desc)); + desc = (struct reiserfs_journal_desc *)(bh->b_data); + printk("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)", + (unsigned long long)bh->b_blocknr, get_desc_trans_id(desc), + get_desc_mount_id(desc), get_desc_trans_len(desc)); - return 0; + return 0; } - -void print_block (struct buffer_head * bh, ...)//int print_mode, int first, int last) +void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int last) { - va_list args; - int mode, first, last; + va_list args; + int mode, first, last; - va_start (args, bh); + va_start(args, bh); - if ( ! bh ) { - printk("print_block: buffer is NULL\n"); - return; - } + if (!bh) { + printk("print_block: buffer is NULL\n"); + return; + } - mode = va_arg (args, int); - first = va_arg (args, int); - last = va_arg (args, int); - if (print_leaf (bh, mode, first, last)) - if (print_internal (bh, first, last)) - if (print_super_block (bh)) - if (print_desc_block (bh)) - printk ("Block %llu contains unformatted data\n", (unsigned long long)bh->b_blocknr); + mode = va_arg(args, int); + first = va_arg(args, int); + last = va_arg(args, int); + if (print_leaf(bh, mode, first, last)) + if (print_internal(bh, first, last)) + if (print_super_block(bh)) + if (print_desc_block(bh)) + printk + ("Block %llu contains unformatted data\n", + (unsigned long long)bh->b_blocknr); } - - static char print_tb_buf[2048]; /* this stores initial state of tree balance in the print_tb_buf */ -void store_print_tb (struct tree_balance * tb) -{ - int h = 0; - int i; - struct buffer_head * tbSh, * tbFh; - - if (!tb) - return; - - sprintf (print_tb_buf, "\n" - "BALANCING %d\n" - "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" - "=====================================================================\n" - "* h * S * L * R * F * FL * FR * CFL * CFR *\n", - REISERFS_SB(tb->tb_sb)->s_do_balance, - tb->tb_mode, PATH_LAST_POSITION (tb->tb_path), tb->tb_path->pos_in_item); - - for (h = 0; h < sizeof(tb->insert_size) / sizeof (tb->insert_size[0]); h ++) { - if (PATH_H_PATH_OFFSET (tb->tb_path, h) <= tb->tb_path->path_length && - PATH_H_PATH_OFFSET (tb->tb_path, h) > ILLEGAL_PATH_ELEMENT_OFFSET) { - tbSh = PATH_H_PBUFFER (tb->tb_path, h); - tbFh = PATH_H_PPARENT (tb->tb_path, h); - } else { - tbSh = NULL; - tbFh = NULL; +void store_print_tb(struct tree_balance *tb) +{ + int h = 0; + int i; + struct buffer_head *tbSh, *tbFh; + + if (!tb) + return; + + sprintf(print_tb_buf, "\n" + "BALANCING %d\n" + "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" + "=====================================================================\n" + "* h * S * L * R * F * FL * FR * CFL * CFR *\n", + REISERFS_SB(tb->tb_sb)->s_do_balance, + tb->tb_mode, PATH_LAST_POSITION(tb->tb_path), + tb->tb_path->pos_in_item); + + for (h = 0; h < sizeof(tb->insert_size) / sizeof(tb->insert_size[0]); + h++) { + if (PATH_H_PATH_OFFSET(tb->tb_path, h) <= + tb->tb_path->path_length + && PATH_H_PATH_OFFSET(tb->tb_path, + h) > ILLEGAL_PATH_ELEMENT_OFFSET) { + tbSh = PATH_H_PBUFFER(tb->tb_path, h); + tbFh = PATH_H_PPARENT(tb->tb_path, h); + } else { + tbSh = NULL; + tbFh = NULL; + } + sprintf(print_tb_buf + strlen(print_tb_buf), + "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n", + h, + (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL), + (tbSh) ? atomic_read(&(tbSh->b_count)) : -1, + (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL), + (tb->L[h]) ? atomic_read(&(tb->L[h]->b_count)) : -1, + (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL), + (tb->R[h]) ? atomic_read(&(tb->R[h]->b_count)) : -1, + (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL), + (tb->FL[h]) ? (long long)(tb->FL[h]-> + b_blocknr) : (-1LL), + (tb->FR[h]) ? (long long)(tb->FR[h]-> + b_blocknr) : (-1LL), + (tb->CFL[h]) ? (long long)(tb->CFL[h]-> + b_blocknr) : (-1LL), + (tb->CFR[h]) ? (long long)(tb->CFR[h]-> + b_blocknr) : (-1LL)); } - sprintf (print_tb_buf + strlen (print_tb_buf), - "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n", - h, - (tbSh) ? (long long)(tbSh->b_blocknr):(-1LL), - (tbSh) ? atomic_read (&(tbSh->b_count)) : -1, - (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr):(-1LL), - (tb->L[h]) ? atomic_read (&(tb->L[h]->b_count)) : -1, - (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr):(-1LL), - (tb->R[h]) ? atomic_read (&(tb->R[h]->b_count)) : -1, - (tbFh) ? (long long)(tbFh->b_blocknr):(-1LL), - (tb->FL[h]) ? (long long)(tb->FL[h]->b_blocknr):(-1LL), - (tb->FR[h]) ? (long long)(tb->FR[h]->b_blocknr):(-1LL), - (tb->CFL[h]) ? (long long)(tb->CFL[h]->b_blocknr):(-1LL), - (tb->CFR[h]) ? (long long)(tb->CFR[h]->b_blocknr):(-1LL)); - } - - sprintf (print_tb_buf + strlen (print_tb_buf), - "=====================================================================\n" - "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" - "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", - tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],tb->rbytes, tb->blknum[0], - tb->s0num, tb->s1num,tb->s1bytes, tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], tb->rkey[0]); - - /* this prints balance parameters for non-leaf levels */ - h = 0; - do { - h++; - sprintf (print_tb_buf + strlen (print_tb_buf), - "* %d * %4d * %2d * * %2d * * %2d *\n", - h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], tb->blknum[h]); - } while (tb->insert_size[h]); - - sprintf (print_tb_buf + strlen (print_tb_buf), - "=====================================================================\n" - "FEB list: "); - - /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */ - h = 0; - for (i = 0; i < sizeof (tb->FEB) / sizeof (tb->FEB[0]); i ++) - sprintf (print_tb_buf + strlen (print_tb_buf), - "%p (%llu %d)%s", tb->FEB[i], tb->FEB[i] ? (unsigned long long)tb->FEB[i]->b_blocknr : 0ULL, - tb->FEB[i] ? atomic_read (&(tb->FEB[i]->b_count)) : 0, - (i == sizeof (tb->FEB) / sizeof (tb->FEB[0]) - 1) ? "\n" : ", "); - - sprintf (print_tb_buf + strlen (print_tb_buf), - "======================== the end ====================================\n"); -} - -void print_cur_tb (char * mes) -{ - printk ("%s\n%s", mes, print_tb_buf); -} - -static void check_leaf_block_head (struct buffer_head * bh) -{ - struct block_head * blkh; - int nr; - - blkh = B_BLK_HEAD (bh); - nr = blkh_nr_item(blkh); - if ( nr > (bh->b_size - BLKH_SIZE) / IH_SIZE) - reiserfs_panic (NULL, "vs-6010: check_leaf_block_head: invalid item number %z", bh); - if ( blkh_free_space(blkh) > - bh->b_size - BLKH_SIZE - IH_SIZE * nr ) - reiserfs_panic (NULL, "vs-6020: check_leaf_block_head: invalid free space %z", bh); - -} -static void check_internal_block_head (struct buffer_head * bh) -{ - struct block_head * blkh; - - blkh = B_BLK_HEAD (bh); - if (!(B_LEVEL (bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL (bh) <= MAX_HEIGHT)) - reiserfs_panic (NULL, "vs-6025: check_internal_block_head: invalid level %z", bh); + sprintf(print_tb_buf + strlen(print_tb_buf), + "=====================================================================\n" + "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" + "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", + tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0], + tb->rbytes, tb->blknum[0], tb->s0num, tb->s1num, tb->s1bytes, + tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], + tb->rkey[0]); + + /* this prints balance parameters for non-leaf levels */ + h = 0; + do { + h++; + sprintf(print_tb_buf + strlen(print_tb_buf), + "* %d * %4d * %2d * * %2d * * %2d *\n", + h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], + tb->blknum[h]); + } while (tb->insert_size[h]); - if (B_NR_ITEMS (bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) - reiserfs_panic (NULL, "vs-6030: check_internal_block_head: invalid item number %z", bh); + sprintf(print_tb_buf + strlen(print_tb_buf), + "=====================================================================\n" + "FEB list: "); - if (B_FREE_SPACE (bh) != - bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS (bh) - DC_SIZE * (B_NR_ITEMS (bh) + 1)) - reiserfs_panic (NULL, "vs-6040: check_internal_block_head: invalid free space %z", bh); + /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */ + h = 0; + for (i = 0; i < sizeof(tb->FEB) / sizeof(tb->FEB[0]); i++) + sprintf(print_tb_buf + strlen(print_tb_buf), + "%p (%llu %d)%s", tb->FEB[i], + tb->FEB[i] ? (unsigned long long)tb->FEB[i]-> + b_blocknr : 0ULL, + tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0, + (i == + sizeof(tb->FEB) / sizeof(tb->FEB[0]) - + 1) ? "\n" : ", "); + sprintf(print_tb_buf + strlen(print_tb_buf), + "======================== the end ====================================\n"); } +void print_cur_tb(char *mes) +{ + printk("%s\n%s", mes, print_tb_buf); +} -void check_leaf (struct buffer_head * bh) +static void check_leaf_block_head(struct buffer_head *bh) { - int i; - struct item_head * ih; + struct block_head *blkh; + int nr; + + blkh = B_BLK_HEAD(bh); + nr = blkh_nr_item(blkh); + if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE) + reiserfs_panic(NULL, + "vs-6010: check_leaf_block_head: invalid item number %z", + bh); + if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr) + reiserfs_panic(NULL, + "vs-6020: check_leaf_block_head: invalid free space %z", + bh); - if (!bh) - return; - check_leaf_block_head (bh); - for (i = 0, ih = B_N_PITEM_HEAD (bh, 0); i < B_NR_ITEMS (bh); i ++, ih ++) - op_check_item (ih, B_I_PITEM (bh, ih)); } +static void check_internal_block_head(struct buffer_head *bh) +{ + struct block_head *blkh; + + blkh = B_BLK_HEAD(bh); + if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT)) + reiserfs_panic(NULL, + "vs-6025: check_internal_block_head: invalid level %z", + bh); + + if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) + reiserfs_panic(NULL, + "vs-6030: check_internal_block_head: invalid item number %z", + bh); + + if (B_FREE_SPACE(bh) != + bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) - + DC_SIZE * (B_NR_ITEMS(bh) + 1)) + reiserfs_panic(NULL, + "vs-6040: check_internal_block_head: invalid free space %z", + bh); + +} -void check_internal (struct buffer_head * bh) +void check_leaf(struct buffer_head *bh) { - if (!bh) - return; - check_internal_block_head (bh); + int i; + struct item_head *ih; + + if (!bh) + return; + check_leaf_block_head(bh); + for (i = 0, ih = B_N_PITEM_HEAD(bh, 0); i < B_NR_ITEMS(bh); i++, ih++) + op_check_item(ih, B_I_PITEM(bh, ih)); } +void check_internal(struct buffer_head *bh) +{ + if (!bh) + return; + check_internal_block_head(bh); +} -void print_statistics (struct super_block * s) +void print_statistics(struct super_block *s) { - /* - printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \ -bmap with search %d, without %d, dir2ind %d, ind2dir %d\n", - REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes, - REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search, - REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct); - */ + /* + printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \ + bmap with search %d, without %d, dir2ind %d, ind2dir %d\n", + REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes, + REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search, + REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct); + */ } diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index e242ebc7f6f..fc2f43c75df 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -33,28 +33,27 @@ static int show_version(struct seq_file *m, struct super_block *sb) { char *format; - - if ( REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6) ) { + + if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { format = "3.6"; - } else if ( REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5) ) { + } else if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5)) { format = "3.5"; } else { format = "unknown"; } - seq_printf(m, "%s format\twith checks %s\n", - format, + seq_printf(m, "%s format\twith checks %s\n", format, #if defined( CONFIG_REISERFS_CHECK ) - "on" + "on" #else - "off" + "off" #endif - ); + ); return 0; } -int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset, - int count, int *eof, void *data ) +int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset, + int count, int *eof, void *data) { *start = buffer; *eof = 1; @@ -79,87 +78,68 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset, #define DJF( x ) le32_to_cpu( rs -> x ) #define DJV( x ) le32_to_cpu( s_v1 -> x ) -#define DJP( x ) le32_to_cpu( jp -> x ) +#define DJP( x ) le32_to_cpu( jp -> x ) #define JF( x ) ( r -> s_journal -> x ) static int show_super(struct seq_file *m, struct super_block *sb) { struct reiserfs_sb_info *r = REISERFS_SB(sb); - - seq_printf(m, "state: \t%s\n" - "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n" - "gen. counter: \t%i\n" - "s_kmallocs: \t%i\n" - "s_disk_reads: \t%i\n" - "s_disk_writes: \t%i\n" - "s_fix_nodes: \t%i\n" - "s_do_balance: \t%i\n" - "s_unneeded_left_neighbor: \t%i\n" - "s_good_search_by_key_reada: \t%i\n" - "s_bmaps: \t%i\n" - "s_bmaps_without_search: \t%i\n" - "s_direct2indirect: \t%i\n" - "s_indirect2direct: \t%i\n" - "\n" - "max_hash_collisions: \t%i\n" - - "breads: \t%lu\n" - "bread_misses: \t%lu\n" - - "search_by_key: \t%lu\n" - "search_by_key_fs_changed: \t%lu\n" - "search_by_key_restarted: \t%lu\n" - - "insert_item_restarted: \t%lu\n" - "paste_into_item_restarted: \t%lu\n" - "cut_from_item_restarted: \t%lu\n" - "delete_solid_item_restarted: \t%lu\n" - "delete_item_restarted: \t%lu\n" - - "leaked_oid: \t%lu\n" - "leaves_removable: \t%lu\n", - - SF( s_mount_state ) == REISERFS_VALID_FS ? - "REISERFS_VALID_FS" : "REISERFS_ERROR_FS", - reiserfs_r5_hash( sb ) ? "FORCE_R5 " : "", - reiserfs_rupasov_hash( sb ) ? "FORCE_RUPASOV " : "", - reiserfs_tea_hash( sb ) ? "FORCE_TEA " : "", - reiserfs_hash_detect( sb ) ? "DETECT_HASH " : "", - reiserfs_no_border( sb ) ? "NO_BORDER " : "BORDER ", - reiserfs_no_unhashed_relocation( sb ) ? "NO_UNHASHED_RELOCATION " : "", - reiserfs_hashed_relocation( sb ) ? "UNHASHED_RELOCATION " : "", - reiserfs_test4( sb ) ? "TEST4 " : "", - have_large_tails( sb ) ? "TAILS " : have_small_tails(sb)?"SMALL_TAILS ":"NO_TAILS ", - replay_only( sb ) ? "REPLAY_ONLY " : "", - convert_reiserfs( sb ) ? "CONV " : "", - - atomic_read( &r -> s_generation_counter ), - SF( s_kmallocs ), - SF( s_disk_reads ), - SF( s_disk_writes ), - SF( s_fix_nodes ), - SF( s_do_balance ), - SF( s_unneeded_left_neighbor ), - SF( s_good_search_by_key_reada ), - SF( s_bmaps ), - SF( s_bmaps_without_search ), - SF( s_direct2indirect ), - SF( s_indirect2direct ), - SFP( max_hash_collisions ), - SFP( breads ), - SFP( bread_miss ), - SFP( search_by_key ), - SFP( search_by_key_fs_changed ), - SFP( search_by_key_restarted ), - - SFP( insert_item_restarted ), - SFP( paste_into_item_restarted ), - SFP( cut_from_item_restarted ), - SFP( delete_solid_item_restarted ), - SFP( delete_item_restarted ), - - SFP( leaked_oid ), - SFP( leaves_removable ) ); + + seq_printf(m, "state: \t%s\n" + "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n" + "gen. counter: \t%i\n" + "s_kmallocs: \t%i\n" + "s_disk_reads: \t%i\n" + "s_disk_writes: \t%i\n" + "s_fix_nodes: \t%i\n" + "s_do_balance: \t%i\n" + "s_unneeded_left_neighbor: \t%i\n" + "s_good_search_by_key_reada: \t%i\n" + "s_bmaps: \t%i\n" + "s_bmaps_without_search: \t%i\n" + "s_direct2indirect: \t%i\n" + "s_indirect2direct: \t%i\n" + "\n" + "max_hash_collisions: \t%i\n" + "breads: \t%lu\n" + "bread_misses: \t%lu\n" + "search_by_key: \t%lu\n" + "search_by_key_fs_changed: \t%lu\n" + "search_by_key_restarted: \t%lu\n" + "insert_item_restarted: \t%lu\n" + "paste_into_item_restarted: \t%lu\n" + "cut_from_item_restarted: \t%lu\n" + "delete_solid_item_restarted: \t%lu\n" + "delete_item_restarted: \t%lu\n" + "leaked_oid: \t%lu\n" + "leaves_removable: \t%lu\n", + SF(s_mount_state) == REISERFS_VALID_FS ? + "REISERFS_VALID_FS" : "REISERFS_ERROR_FS", + reiserfs_r5_hash(sb) ? "FORCE_R5 " : "", + reiserfs_rupasov_hash(sb) ? "FORCE_RUPASOV " : "", + reiserfs_tea_hash(sb) ? "FORCE_TEA " : "", + reiserfs_hash_detect(sb) ? "DETECT_HASH " : "", + reiserfs_no_border(sb) ? "NO_BORDER " : "BORDER ", + reiserfs_no_unhashed_relocation(sb) ? + "NO_UNHASHED_RELOCATION " : "", + reiserfs_hashed_relocation(sb) ? "UNHASHED_RELOCATION " : "", + reiserfs_test4(sb) ? "TEST4 " : "", + have_large_tails(sb) ? "TAILS " : have_small_tails(sb) ? + "SMALL_TAILS " : "NO_TAILS ", + replay_only(sb) ? "REPLAY_ONLY " : "", + convert_reiserfs(sb) ? "CONV " : "", + atomic_read(&r->s_generation_counter), SF(s_kmallocs), + SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes), + SF(s_do_balance), SF(s_unneeded_left_neighbor), + SF(s_good_search_by_key_reada), SF(s_bmaps), + SF(s_bmaps_without_search), SF(s_direct2indirect), + SF(s_indirect2direct), SFP(max_hash_collisions), SFP(breads), + SFP(bread_miss), SFP(search_by_key), + SFP(search_by_key_fs_changed), SFP(search_by_key_restarted), + SFP(insert_item_restarted), SFP(paste_into_item_restarted), + SFP(cut_from_item_restarted), + SFP(delete_solid_item_restarted), SFP(delete_item_restarted), + SFP(leaked_oid), SFP(leaves_removable)); return 0; } @@ -169,61 +149,55 @@ static int show_per_level(struct seq_file *m, struct super_block *sb) struct reiserfs_sb_info *r = REISERFS_SB(sb); int level; - seq_printf(m, "level\t" - " balances" - " [sbk: reads" - " fs_changed" - " restarted]" - " free space" - " items" - " can_remove" - " lnum" - " rnum" - " lbytes" - " rbytes" - " get_neig" - " get_neig_res" - " need_l_neig" - " need_r_neig" - "\n" - - ); - - for( level = 0 ; level < MAX_HEIGHT ; ++ level ) { - seq_printf(m, "%i\t" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12li" - " %12li" - " %12li" - " %12li" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - "\n", - level, - SFPL( balance_at ), - SFPL( sbk_read_at ), - SFPL( sbk_fs_changed ), - SFPL( sbk_restarted ), - SFPL( free_at ), - SFPL( items_at ), - SFPL( can_node_be_removed ), - SFPL( lnum ), - SFPL( rnum ), - SFPL( lbytes ), - SFPL( rbytes ), - SFPL( get_neighbors ), - SFPL( get_neighbors_restart ), - SFPL( need_l_neighbor ), - SFPL( need_r_neighbor ) - ); + seq_printf(m, "level\t" + " balances" + " [sbk: reads" + " fs_changed" + " restarted]" + " free space" + " items" + " can_remove" + " lnum" + " rnum" + " lbytes" + " rbytes" + " get_neig" + " get_neig_res" " need_l_neig" " need_r_neig" "\n"); + + for (level = 0; level < MAX_HEIGHT; ++level) { + seq_printf(m, "%i\t" + " %12lu" + " %12lu" + " %12lu" + " %12lu" + " %12lu" + " %12lu" + " %12lu" + " %12li" + " %12li" + " %12li" + " %12li" + " %12lu" + " %12lu" + " %12lu" + " %12lu" + "\n", + level, + SFPL(balance_at), + SFPL(sbk_read_at), + SFPL(sbk_fs_changed), + SFPL(sbk_restarted), + SFPL(free_at), + SFPL(items_at), + SFPL(can_node_be_removed), + SFPL(lnum), + SFPL(rnum), + SFPL(lbytes), + SFPL(rbytes), + SFPL(get_neighbors), + SFPL(get_neighbors_restart), + SFPL(need_l_neighbor), SFPL(need_r_neighbor) + ); } return 0; } @@ -232,31 +206,30 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb) { struct reiserfs_sb_info *r = REISERFS_SB(sb); - seq_printf(m, "free_block: %lu\n" - " scan_bitmap:" - " wait" - " bmap" - " retry" - " stolen" - " journal_hint" - "journal_nohint" - "\n" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - "\n", - SFP( free_block ), - SFPF( call ), - SFPF( wait ), - SFPF( bmap ), - SFPF( retry ), - SFPF( stolen ), - SFPF( in_journal_hint ), - SFPF( in_journal_nohint ) ); + seq_printf(m, "free_block: %lu\n" + " scan_bitmap:" + " wait" + " bmap" + " retry" + " stolen" + " journal_hint" + "journal_nohint" + "\n" + " %14lu" + " %14lu" + " %14lu" + " %14lu" + " %14lu" + " %14lu" + " %14lu" + "\n", + SFP(free_block), + SFPF(call), + SFPF(wait), + SFPF(bmap), + SFPF(retry), + SFPF(stolen), + SFPF(in_journal_hint), SFPF(in_journal_nohint)); return 0; } @@ -264,46 +237,42 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb) static int show_on_disk_super(struct seq_file *m, struct super_block *sb) { struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); - struct reiserfs_super_block *rs = sb_info -> s_rs; - int hash_code = DFL( s_hash_function_code ); - __u32 flags = DJF( s_flags ); - - seq_printf(m, "block_count: \t%i\n" - "free_blocks: \t%i\n" - "root_block: \t%i\n" - "blocksize: \t%i\n" - "oid_maxsize: \t%i\n" - "oid_cursize: \t%i\n" - "umount_state: \t%i\n" - "magic: \t%10.10s\n" - "fs_state: \t%i\n" - "hash: \t%s\n" - "tree_height: \t%i\n" - "bmap_nr: \t%i\n" - "version: \t%i\n" - "flags: \t%x[%s]\n" - "reserved_for_journal: \t%i\n", - - DFL( s_block_count ), - DFL( s_free_blocks ), - DFL( s_root_block ), - DF( s_blocksize ), - DF( s_oid_maxsize ), - DF( s_oid_cursize ), - DF( s_umount_state ), - rs -> s_v1.s_magic, - DF( s_fs_state ), - hash_code == TEA_HASH ? "tea" : - ( hash_code == YURA_HASH ) ? "rupasov" : - ( hash_code == R5_HASH ) ? "r5" : - ( hash_code == UNSET_HASH ) ? "unset" : "unknown", - DF( s_tree_height ), - DF( s_bmap_nr ), - DF( s_version ), - flags, - ( flags & reiserfs_attrs_cleared ) - ? "attrs_cleared" : "", - DF (s_reserved_for_journal)); + struct reiserfs_super_block *rs = sb_info->s_rs; + int hash_code = DFL(s_hash_function_code); + __u32 flags = DJF(s_flags); + + seq_printf(m, "block_count: \t%i\n" + "free_blocks: \t%i\n" + "root_block: \t%i\n" + "blocksize: \t%i\n" + "oid_maxsize: \t%i\n" + "oid_cursize: \t%i\n" + "umount_state: \t%i\n" + "magic: \t%10.10s\n" + "fs_state: \t%i\n" + "hash: \t%s\n" + "tree_height: \t%i\n" + "bmap_nr: \t%i\n" + "version: \t%i\n" + "flags: \t%x[%s]\n" + "reserved_for_journal: \t%i\n", + DFL(s_block_count), + DFL(s_free_blocks), + DFL(s_root_block), + DF(s_blocksize), + DF(s_oid_maxsize), + DF(s_oid_cursize), + DF(s_umount_state), + rs->s_v1.s_magic, + DF(s_fs_state), + hash_code == TEA_HASH ? "tea" : + (hash_code == YURA_HASH) ? "rupasov" : + (hash_code == R5_HASH) ? "r5" : + (hash_code == UNSET_HASH) ? "unset" : "unknown", + DF(s_tree_height), + DF(s_bmap_nr), + DF(s_version), flags, (flags & reiserfs_attrs_cleared) + ? "attrs_cleared" : "", DF(s_reserved_for_journal)); return 0; } @@ -311,131 +280,122 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb) static int show_oidmap(struct seq_file *m, struct super_block *sb) { struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); - struct reiserfs_super_block *rs = sb_info -> s_rs; - unsigned int mapsize = le16_to_cpu( rs -> s_v1.s_oid_cursize ); + struct reiserfs_super_block *rs = sb_info->s_rs; + unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); unsigned long total_used = 0; int i; - for( i = 0 ; i < mapsize ; ++i ) { + for (i = 0; i < mapsize; ++i) { __u32 right; - right = ( i == mapsize - 1 ) ? MAX_KEY_OBJECTID : MAP( i + 1 ); + right = (i == mapsize - 1) ? MAX_KEY_OBJECTID : MAP(i + 1); seq_printf(m, "%s: [ %x .. %x )\n", - ( i & 1 ) ? "free" : "used", MAP( i ), right ); - if( ! ( i & 1 ) ) { - total_used += right - MAP( i ); + (i & 1) ? "free" : "used", MAP(i), right); + if (!(i & 1)) { + total_used += right - MAP(i); } } #if defined( REISERFS_USE_OIDMAPF ) - if( sb_info -> oidmap.use_file && ( sb_info -> oidmap.mapf != NULL ) ) { + if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) { loff_t size = sb_info->oidmap.mapf->f_dentry->d_inode->i_size; - total_used += size / sizeof( reiserfs_oidinterval_d_t ); + total_used += size / sizeof(reiserfs_oidinterval_d_t); } #endif - seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n", - mapsize, - mapsize, le16_to_cpu( rs -> s_v1.s_oid_maxsize ), - total_used); + seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n", + mapsize, + mapsize, le16_to_cpu(rs->s_v1.s_oid_maxsize), total_used); return 0; } static int show_journal(struct seq_file *m, struct super_block *sb) { struct reiserfs_sb_info *r = REISERFS_SB(sb); - struct reiserfs_super_block *rs = r -> s_rs; + struct reiserfs_super_block *rs = r->s_rs; struct journal_params *jp = &rs->s_v1.s_journal; char b[BDEVNAME_SIZE]; - - - seq_printf(m, /* on-disk fields */ - "jp_journal_1st_block: \t%i\n" - "jp_journal_dev: \t%s[%x]\n" - "jp_journal_size: \t%i\n" - "jp_journal_trans_max: \t%i\n" - "jp_journal_magic: \t%i\n" - "jp_journal_max_batch: \t%i\n" - "jp_journal_max_commit_age: \t%i\n" - "jp_journal_max_trans_age: \t%i\n" - /* incore fields */ - "j_1st_reserved_block: \t%i\n" - "j_state: \t%li\n" - "j_trans_id: \t%lu\n" - "j_mount_id: \t%lu\n" - "j_start: \t%lu\n" - "j_len: \t%lu\n" - "j_len_alloc: \t%lu\n" - "j_wcount: \t%i\n" - "j_bcount: \t%lu\n" - "j_first_unflushed_offset: \t%lu\n" - "j_last_flush_trans_id: \t%lu\n" - "j_trans_start_time: \t%li\n" - "j_list_bitmap_index: \t%i\n" - "j_must_wait: \t%i\n" - "j_next_full_flush: \t%i\n" - "j_next_async_flush: \t%i\n" - "j_cnode_used: \t%i\n" - "j_cnode_free: \t%i\n" - "\n" - /* reiserfs_proc_info_data_t.journal fields */ - "in_journal: \t%12lu\n" - "in_journal_bitmap: \t%12lu\n" - "in_journal_reusable: \t%12lu\n" - "lock_journal: \t%12lu\n" - "lock_journal_wait: \t%12lu\n" - "journal_begin: \t%12lu\n" - "journal_relock_writers: \t%12lu\n" - "journal_relock_wcount: \t%12lu\n" - "mark_dirty: \t%12lu\n" - "mark_dirty_already: \t%12lu\n" - "mark_dirty_notjournal: \t%12lu\n" - "restore_prepared: \t%12lu\n" - "prepare: \t%12lu\n" - "prepare_retry: \t%12lu\n", - - DJP( jp_journal_1st_block ), - bdevname(SB_JOURNAL(sb)->j_dev_bd, b), - DJP( jp_journal_dev ), - DJP( jp_journal_size ), - DJP( jp_journal_trans_max ), - DJP( jp_journal_magic ), - DJP( jp_journal_max_batch ), - SB_JOURNAL(sb)->j_max_commit_age, - DJP( jp_journal_max_trans_age ), - - JF( j_1st_reserved_block ), - JF( j_state ), - JF( j_trans_id ), - JF( j_mount_id ), - JF( j_start ), - JF( j_len ), - JF( j_len_alloc ), - atomic_read( & r -> s_journal -> j_wcount ), - JF( j_bcount ), - JF( j_first_unflushed_offset ), - JF( j_last_flush_trans_id ), - JF( j_trans_start_time ), - JF( j_list_bitmap_index ), - JF( j_must_wait ), - JF( j_next_full_flush ), - JF( j_next_async_flush ), - JF( j_cnode_used ), - JF( j_cnode_free ), - - SFPJ( in_journal ), - SFPJ( in_journal_bitmap ), - SFPJ( in_journal_reusable ), - SFPJ( lock_journal ), - SFPJ( lock_journal_wait ), - SFPJ( journal_being ), - SFPJ( journal_relock_writers ), - SFPJ( journal_relock_wcount ), - SFPJ( mark_dirty ), - SFPJ( mark_dirty_already ), - SFPJ( mark_dirty_notjournal ), - SFPJ( restore_prepared ), - SFPJ( prepare ), - SFPJ( prepare_retry ) - ); + + seq_printf(m, /* on-disk fields */ + "jp_journal_1st_block: \t%i\n" + "jp_journal_dev: \t%s[%x]\n" + "jp_journal_size: \t%i\n" + "jp_journal_trans_max: \t%i\n" + "jp_journal_magic: \t%i\n" + "jp_journal_max_batch: \t%i\n" + "jp_journal_max_commit_age: \t%i\n" + "jp_journal_max_trans_age: \t%i\n" + /* incore fields */ + "j_1st_reserved_block: \t%i\n" + "j_state: \t%li\n" + "j_trans_id: \t%lu\n" + "j_mount_id: \t%lu\n" + "j_start: \t%lu\n" + "j_len: \t%lu\n" + "j_len_alloc: \t%lu\n" + "j_wcount: \t%i\n" + "j_bcount: \t%lu\n" + "j_first_unflushed_offset: \t%lu\n" + "j_last_flush_trans_id: \t%lu\n" + "j_trans_start_time: \t%li\n" + "j_list_bitmap_index: \t%i\n" + "j_must_wait: \t%i\n" + "j_next_full_flush: \t%i\n" + "j_next_async_flush: \t%i\n" + "j_cnode_used: \t%i\n" "j_cnode_free: \t%i\n" "\n" + /* reiserfs_proc_info_data_t.journal fields */ + "in_journal: \t%12lu\n" + "in_journal_bitmap: \t%12lu\n" + "in_journal_reusable: \t%12lu\n" + "lock_journal: \t%12lu\n" + "lock_journal_wait: \t%12lu\n" + "journal_begin: \t%12lu\n" + "journal_relock_writers: \t%12lu\n" + "journal_relock_wcount: \t%12lu\n" + "mark_dirty: \t%12lu\n" + "mark_dirty_already: \t%12lu\n" + "mark_dirty_notjournal: \t%12lu\n" + "restore_prepared: \t%12lu\n" + "prepare: \t%12lu\n" + "prepare_retry: \t%12lu\n", + DJP(jp_journal_1st_block), + bdevname(SB_JOURNAL(sb)->j_dev_bd, b), + DJP(jp_journal_dev), + DJP(jp_journal_size), + DJP(jp_journal_trans_max), + DJP(jp_journal_magic), + DJP(jp_journal_max_batch), + SB_JOURNAL(sb)->j_max_commit_age, + DJP(jp_journal_max_trans_age), + JF(j_1st_reserved_block), + JF(j_state), + JF(j_trans_id), + JF(j_mount_id), + JF(j_start), + JF(j_len), + JF(j_len_alloc), + atomic_read(&r->s_journal->j_wcount), + JF(j_bcount), + JF(j_first_unflushed_offset), + JF(j_last_flush_trans_id), + JF(j_trans_start_time), + JF(j_list_bitmap_index), + JF(j_must_wait), + JF(j_next_full_flush), + JF(j_next_async_flush), + JF(j_cnode_used), + JF(j_cnode_free), + SFPJ(in_journal), + SFPJ(in_journal_bitmap), + SFPJ(in_journal_reusable), + SFPJ(lock_journal), + SFPJ(lock_journal_wait), + SFPJ(journal_being), + SFPJ(journal_relock_writers), + SFPJ(journal_relock_wcount), + SFPJ(mark_dirty), + SFPJ(mark_dirty_already), + SFPJ(mark_dirty_notjournal), + SFPJ(restore_prepared), SFPJ(prepare), SFPJ(prepare_retry) + ); return 0; } @@ -450,7 +410,7 @@ static int set_sb(struct super_block *sb, void *data) return -ENOENT; } -static void *r_start(struct seq_file *m, loff_t *pos) +static void *r_start(struct seq_file *m, loff_t * pos) { struct proc_dir_entry *de = m->private; struct super_block *s = de->parent->data; @@ -472,7 +432,7 @@ static void *r_start(struct seq_file *m, loff_t *pos) return s; } -static void *r_next(struct seq_file *m, void *v, loff_t *pos) +static void *r_next(struct seq_file *m, void *v, loff_t * pos) { ++*pos; if (v) @@ -489,7 +449,7 @@ static void r_stop(struct seq_file *m, void *v) static int r_show(struct seq_file *m, void *v) { struct proc_dir_entry *de = m->private; - int (*show)(struct seq_file *, struct super_block *) = de->data; + int (*show) (struct seq_file *, struct super_block *) = de->data; return show(m, v); } @@ -512,17 +472,17 @@ static int r_open(struct inode *inode, struct file *file) } static struct file_operations r_file_operations = { - .open = r_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, + .open = r_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, }; static struct proc_dir_entry *proc_info_root = NULL; static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, - int (*func)(struct seq_file *, struct super_block *)) + int (*func) (struct seq_file *, struct super_block *)) { struct proc_dir_entry *de; de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); @@ -532,11 +492,12 @@ static void add_file(struct super_block *sb, char *name, } } -int reiserfs_proc_info_init( struct super_block *sb ) +int reiserfs_proc_info_init(struct super_block *sb) { - spin_lock_init( & __PINFO( sb ).lock ); - REISERFS_SB(sb)->procdir = proc_mkdir(reiserfs_bdevname (sb), proc_info_root); - if( REISERFS_SB(sb)->procdir ) { + spin_lock_init(&__PINFO(sb).lock); + REISERFS_SB(sb)->procdir = + proc_mkdir(reiserfs_bdevname(sb), proc_info_root); + if (REISERFS_SB(sb)->procdir) { REISERFS_SB(sb)->procdir->owner = THIS_MODULE; REISERFS_SB(sb)->procdir->data = sb; add_file(sb, "version", show_version); @@ -549,11 +510,11 @@ int reiserfs_proc_info_init( struct super_block *sb ) return 0; } reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s", - proc_info_root_name, reiserfs_bdevname (sb) ); + proc_info_root_name, reiserfs_bdevname(sb)); return 1; } -int reiserfs_proc_info_done( struct super_block *sb ) +int reiserfs_proc_info_done(struct super_block *sb) { struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; if (de) { @@ -565,48 +526,48 @@ int reiserfs_proc_info_done( struct super_block *sb ) remove_proc_entry("super", de); remove_proc_entry("version", de); } - spin_lock( & __PINFO( sb ).lock ); - __PINFO( sb ).exiting = 1; - spin_unlock( & __PINFO( sb ).lock ); - if ( proc_info_root ) { - remove_proc_entry( reiserfs_bdevname (sb), proc_info_root ); + spin_lock(&__PINFO(sb).lock); + __PINFO(sb).exiting = 1; + spin_unlock(&__PINFO(sb).lock); + if (proc_info_root) { + remove_proc_entry(reiserfs_bdevname(sb), proc_info_root); REISERFS_SB(sb)->procdir = NULL; } return 0; } -struct proc_dir_entry *reiserfs_proc_register_global( char *name, - read_proc_t *func ) +struct proc_dir_entry *reiserfs_proc_register_global(char *name, + read_proc_t * func) { - return ( proc_info_root ) ? create_proc_read_entry( name, 0, - proc_info_root, - func, NULL ) : NULL; + return (proc_info_root) ? create_proc_read_entry(name, 0, + proc_info_root, + func, NULL) : NULL; } -void reiserfs_proc_unregister_global( const char *name ) +void reiserfs_proc_unregister_global(const char *name) { - remove_proc_entry( name, proc_info_root ); + remove_proc_entry(name, proc_info_root); } -int reiserfs_proc_info_global_init( void ) +int reiserfs_proc_info_global_init(void) { - if( proc_info_root == NULL ) { + if (proc_info_root == NULL) { proc_info_root = proc_mkdir(proc_info_root_name, NULL); - if( proc_info_root ) { - proc_info_root -> owner = THIS_MODULE; + if (proc_info_root) { + proc_info_root->owner = THIS_MODULE; } else { - reiserfs_warning (NULL, - "reiserfs: cannot create /proc/%s", - proc_info_root_name ); + reiserfs_warning(NULL, + "reiserfs: cannot create /proc/%s", + proc_info_root_name); return 1; } } return 0; } -int reiserfs_proc_info_global_done( void ) +int reiserfs_proc_info_global_done(void) { - if ( proc_info_root != NULL ) { + if (proc_info_root != NULL) { proc_info_root = NULL; remove_proc_entry(proc_info_root_name, NULL); } @@ -616,22 +577,40 @@ int reiserfs_proc_info_global_done( void ) /* REISERFS_PROC_INFO */ #else -int reiserfs_proc_info_init( struct super_block *sb ) { return 0; } -int reiserfs_proc_info_done( struct super_block *sb ) { return 0; } +int reiserfs_proc_info_init(struct super_block *sb) +{ + return 0; +} +int reiserfs_proc_info_done(struct super_block *sb) +{ + return 0; +} -struct proc_dir_entry *reiserfs_proc_register_global( char *name, - read_proc_t *func ) -{ return NULL; } +struct proc_dir_entry *reiserfs_proc_register_global(char *name, + read_proc_t * func) +{ + return NULL; +} -void reiserfs_proc_unregister_global( const char *name ) {;} +void reiserfs_proc_unregister_global(const char *name) +{; +} -int reiserfs_proc_info_global_init( void ) { return 0; } -int reiserfs_proc_info_global_done( void ) { return 0; } +int reiserfs_proc_info_global_init(void) +{ + return 0; +} +int reiserfs_proc_info_global_done(void) +{ + return 0; +} -int reiserfs_global_version_in_proc( char *buffer, char **start, - off_t offset, - int count, int *eof, void *data ) -{ return 0; } +int reiserfs_global_version_in_proc(char *buffer, char **start, + off_t offset, + int count, int *eof, void *data) +{ + return 0; +} /* REISERFS_PROC_INFO */ #endif diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 170012078b7..39cc7f47f5d 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -1,7 +1,7 @@ /* * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ - + /* * Written by Alexander Zarochentcev. * @@ -17,23 +17,23 @@ #include #include -int reiserfs_resize (struct super_block * s, unsigned long block_count_new) +int reiserfs_resize(struct super_block *s, unsigned long block_count_new) { - int err = 0; - struct reiserfs_super_block * sb; - struct reiserfs_bitmap_info *bitmap; + int err = 0; + struct reiserfs_super_block *sb; + struct reiserfs_bitmap_info *bitmap; struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s); - struct buffer_head * bh; + struct buffer_head *bh; struct reiserfs_transaction_handle th; unsigned int bmap_nr_new, bmap_nr; unsigned int block_r_new, block_r; - - struct reiserfs_list_bitmap * jb; + + struct reiserfs_list_bitmap *jb; struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS]; - + unsigned long int block_count, free_blocks; int i; - int copy_size ; + int copy_size; sb = SB_DISK_SUPER_BLOCK(s); @@ -47,136 +47,145 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new) if (!bh) { printk("reiserfs_resize: can\'t read last block\n"); return -EINVAL; - } + } bforget(bh); /* old disk layout detection; those partitions can be mounted, but * cannot be resized */ - if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size - != REISERFS_DISK_OFFSET_IN_BYTES ) { - printk("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n"); + if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size + != REISERFS_DISK_OFFSET_IN_BYTES) { + printk + ("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n"); return -ENOTSUPP; } - + /* count used bits in last bitmap block */ - block_r = SB_BLOCK_COUNT(s) - - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8; - + block_r = SB_BLOCK_COUNT(s) - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8; + /* count bitmap blocks in new fs */ - bmap_nr_new = block_count_new / ( s->s_blocksize * 8 ); + bmap_nr_new = block_count_new / (s->s_blocksize * 8); block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8; - if (block_r_new) + if (block_r_new) bmap_nr_new++; else block_r_new = s->s_blocksize * 8; /* save old values */ block_count = SB_BLOCK_COUNT(s); - bmap_nr = SB_BMAP_NR(s); + bmap_nr = SB_BMAP_NR(s); /* resizing of reiserfs bitmaps (journal and real), if needed */ - if (bmap_nr_new > bmap_nr) { - /* reallocate journal bitmaps */ - if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { - printk("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); - unlock_super(s) ; - return -ENOMEM ; - } - /* the new journal bitmaps are zero filled, now we copy in the bitmap - ** node pointers from the old journal bitmap structs, and then - ** transfer the new data structures into the journal struct. - ** - ** using the copy_size var below allows this code to work for - ** both shrinking and expanding the FS. - */ - copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr ; - copy_size = copy_size * sizeof(struct reiserfs_list_bitmap_node *) ; - for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { - struct reiserfs_bitmap_node **node_tmp ; - jb = SB_JOURNAL(s)->j_list_bitmap + i ; - memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size) ; - - /* just in case vfree schedules on us, copy the new - ** pointer into the journal struct before freeing the - ** old one - */ - node_tmp = jb->bitmaps ; - jb->bitmaps = jbitmap[i].bitmaps ; - vfree(node_tmp) ; - } - - /* allocate additional bitmap blocks, reallocate array of bitmap - * block pointers */ - bitmap = vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); - if (!bitmap) { - /* Journal bitmaps are still supersized, but the memory isn't - * leaked, so I guess it's ok */ - printk("reiserfs_resize: unable to allocate memory.\n"); - return -ENOMEM; - } - memset (bitmap, 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); - for (i = 0; i < bmap_nr; i++) - bitmap[i] = old_bitmap[i]; - - /* This doesn't go through the journal, but it doesn't have to. - * The changes are still atomic: We're synced up when the journal - * transaction begins, and the new bitmaps don't matter if the - * transaction fails. */ - for (i = bmap_nr; i < bmap_nr_new; i++) { - bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8); - memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb)); - reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data); - - set_buffer_uptodate(bitmap[i].bh); - mark_buffer_dirty(bitmap[i].bh) ; - sync_dirty_buffer(bitmap[i].bh); - // update bitmap_info stuff - bitmap[i].first_zero_hint=1; - bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; - } - /* free old bitmap blocks array */ - SB_AP_BITMAP(s) = bitmap; - vfree (old_bitmap); + if (bmap_nr_new > bmap_nr) { + /* reallocate journal bitmaps */ + if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { + printk + ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); + unlock_super(s); + return -ENOMEM; + } + /* the new journal bitmaps are zero filled, now we copy in the bitmap + ** node pointers from the old journal bitmap structs, and then + ** transfer the new data structures into the journal struct. + ** + ** using the copy_size var below allows this code to work for + ** both shrinking and expanding the FS. + */ + copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr; + copy_size = + copy_size * sizeof(struct reiserfs_list_bitmap_node *); + for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { + struct reiserfs_bitmap_node **node_tmp; + jb = SB_JOURNAL(s)->j_list_bitmap + i; + memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); + + /* just in case vfree schedules on us, copy the new + ** pointer into the journal struct before freeing the + ** old one + */ + node_tmp = jb->bitmaps; + jb->bitmaps = jbitmap[i].bitmaps; + vfree(node_tmp); + } + + /* allocate additional bitmap blocks, reallocate array of bitmap + * block pointers */ + bitmap = + vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); + if (!bitmap) { + /* Journal bitmaps are still supersized, but the memory isn't + * leaked, so I guess it's ok */ + printk("reiserfs_resize: unable to allocate memory.\n"); + return -ENOMEM; + } + memset(bitmap, 0, + sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); + for (i = 0; i < bmap_nr; i++) + bitmap[i] = old_bitmap[i]; + + /* This doesn't go through the journal, but it doesn't have to. + * The changes are still atomic: We're synced up when the journal + * transaction begins, and the new bitmaps don't matter if the + * transaction fails. */ + for (i = bmap_nr; i < bmap_nr_new; i++) { + bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8); + memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb)); + reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data); + + set_buffer_uptodate(bitmap[i].bh); + mark_buffer_dirty(bitmap[i].bh); + sync_dirty_buffer(bitmap[i].bh); + // update bitmap_info stuff + bitmap[i].first_zero_hint = 1; + bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; + } + /* free old bitmap blocks array */ + SB_AP_BITMAP(s) = bitmap; + vfree(old_bitmap); } - + /* begin transaction, if there was an error, it's fine. Yes, we have * incorrect bitmaps now, but none of it is ever going to touch the * disk anyway. */ err = journal_begin(&th, s, 10); if (err) - return err; + return err; /* correct last bitmap blocks in old and new disk layout */ reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1].bh, 1); for (i = block_r; i < s->s_blocksize * 8; i++) - reiserfs_test_and_clear_le_bit(i, - SB_AP_BITMAP(s)[bmap_nr - 1].bh->b_data); + reiserfs_test_and_clear_le_bit(i, + SB_AP_BITMAP(s)[bmap_nr - + 1].bh->b_data); SB_AP_BITMAP(s)[bmap_nr - 1].free_count += s->s_blocksize * 8 - block_r; - if ( !SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint) - SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r; + if (!SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint) + SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r; journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1].bh); reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh, 1); for (i = block_r_new; i < s->s_blocksize * 8; i++) - reiserfs_test_and_set_le_bit(i, - SB_AP_BITMAP(s)[bmap_nr_new - 1].bh->b_data); + reiserfs_test_and_set_le_bit(i, + SB_AP_BITMAP(s)[bmap_nr_new - + 1].bh->b_data); journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh); - - SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -= s->s_blocksize * 8 - block_r_new; + + SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -= + s->s_blocksize * 8 - block_r_new; /* Extreme case where last bitmap is the only valid block in itself. */ - if ( !SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count ) - SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0; - /* update super */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; + if (!SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count) + SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0; + /* update super */ + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); free_blocks = SB_FREE_BLOCKS(s); - PUT_SB_FREE_BLOCKS(s, free_blocks + (block_count_new - block_count - (bmap_nr_new - bmap_nr))); + PUT_SB_FREE_BLOCKS(s, + free_blocks + (block_count_new - block_count - + (bmap_nr_new - bmap_nr))); PUT_SB_BLOCK_COUNT(s, block_count_new); PUT_SB_BMAP_NR(s, bmap_nr_new); s->s_dirt = 1; journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); - + SB_JOURNAL(s)->j_must_wait = 1; return journal_end(&th, s, 10); } diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 63158491e15..e2d08d7bcff 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -59,46 +59,45 @@ #include /* Does the buffer contain a disk block which is in the tree. */ -inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh) +inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh) { - RFALSE( B_LEVEL (p_s_bh) > MAX_HEIGHT, - "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh); + RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT, + "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh); - return ( B_LEVEL (p_s_bh) != FREE_LEVEL ); + return (B_LEVEL(p_s_bh) != FREE_LEVEL); } // // to gets item head in le form // -inline void copy_item_head(struct item_head * p_v_to, - const struct item_head * p_v_from) +inline void copy_item_head(struct item_head *p_v_to, + const struct item_head *p_v_from) { - memcpy (p_v_to, p_v_from, IH_SIZE); + memcpy(p_v_to, p_v_from, IH_SIZE); } - /* k1 is pointer to on-disk structure which is stored in little-endian form. k2 is pointer to cpu variable. For key of items of the same object this returns 0. Returns: -1 if key1 < key2 0 if key1 == key2 1 if key1 > key2 */ -inline int comp_short_keys (const struct reiserfs_key * le_key, - const struct cpu_key * cpu_key) +inline int comp_short_keys(const struct reiserfs_key *le_key, + const struct cpu_key *cpu_key) { - __u32 n; - n = le32_to_cpu(le_key->k_dir_id); - if (n < cpu_key->on_disk_key.k_dir_id) - return -1; - if (n > cpu_key->on_disk_key.k_dir_id) - return 1; - n = le32_to_cpu(le_key->k_objectid); - if (n < cpu_key->on_disk_key.k_objectid) - return -1; - if (n > cpu_key->on_disk_key.k_objectid) - return 1; - return 0; + __u32 n; + n = le32_to_cpu(le_key->k_dir_id); + if (n < cpu_key->on_disk_key.k_dir_id) + return -1; + if (n > cpu_key->on_disk_key.k_dir_id) + return 1; + n = le32_to_cpu(le_key->k_objectid); + if (n < cpu_key->on_disk_key.k_objectid) + return -1; + if (n > cpu_key->on_disk_key.k_objectid) + return 1; + return 0; } /* k1 is pointer to on-disk structure which is stored in little-endian @@ -106,68 +105,72 @@ inline int comp_short_keys (const struct reiserfs_key * le_key, Compare keys using all 4 key fields. Returns: -1 if key1 < key2 0 if key1 = key2 1 if key1 > key2 */ -static inline int comp_keys (const struct reiserfs_key * le_key, const struct cpu_key * cpu_key) +static inline int comp_keys(const struct reiserfs_key *le_key, + const struct cpu_key *cpu_key) { - int retval; - - retval = comp_short_keys (le_key, cpu_key); - if (retval) - return retval; - if (le_key_k_offset (le_key_version(le_key), le_key) < cpu_key_k_offset (cpu_key)) - return -1; - if (le_key_k_offset (le_key_version(le_key), le_key) > cpu_key_k_offset (cpu_key)) - return 1; - - if (cpu_key->key_length == 3) - return 0; - - /* this part is needed only when tail conversion is in progress */ - if (le_key_k_type (le_key_version(le_key), le_key) < cpu_key_k_type (cpu_key)) - return -1; + int retval; + + retval = comp_short_keys(le_key, cpu_key); + if (retval) + return retval; + if (le_key_k_offset(le_key_version(le_key), le_key) < + cpu_key_k_offset(cpu_key)) + return -1; + if (le_key_k_offset(le_key_version(le_key), le_key) > + cpu_key_k_offset(cpu_key)) + return 1; + + if (cpu_key->key_length == 3) + return 0; + + /* this part is needed only when tail conversion is in progress */ + if (le_key_k_type(le_key_version(le_key), le_key) < + cpu_key_k_type(cpu_key)) + return -1; + + if (le_key_k_type(le_key_version(le_key), le_key) > + cpu_key_k_type(cpu_key)) + return 1; - if (le_key_k_type (le_key_version(le_key), le_key) > cpu_key_k_type (cpu_key)) - return 1; - - return 0; + return 0; } - -inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct reiserfs_key * key2) +inline int comp_short_le_keys(const struct reiserfs_key *key1, + const struct reiserfs_key *key2) { - __u32 * p_s_1_u32, * p_s_2_u32; - int n_key_length = REISERFS_SHORT_KEY_LEN; - - p_s_1_u32 = (__u32 *)key1; - p_s_2_u32 = (__u32 *)key2; - for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) { - if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) ) - return -1; - if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) ) - return 1; - } - return 0; + __u32 *p_s_1_u32, *p_s_2_u32; + int n_key_length = REISERFS_SHORT_KEY_LEN; + + p_s_1_u32 = (__u32 *) key1; + p_s_2_u32 = (__u32 *) key2; + for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) { + if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32)) + return -1; + if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32)) + return 1; + } + return 0; } -inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from) +inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from) { - int version; - to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id); - to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid); - - // find out version of the key - version = le_key_version (from); - to->version = version; - to->on_disk_key.k_offset = le_key_k_offset(version, from); - to->on_disk_key.k_type = le_key_k_type(version, from); + int version; + to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); + to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); + + // find out version of the key + version = le_key_version(from); + to->version = version; + to->on_disk_key.k_offset = le_key_k_offset(version, from); + to->on_disk_key.k_type = le_key_k_type(version, from); } - - // this does not say which one is bigger, it only returns 1 if keys // are not equal, 0 otherwise -inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_key * k2) +inline int comp_le_keys(const struct reiserfs_key *k1, + const struct reiserfs_key *k2) { - return memcmp (k1, k2, sizeof (struct reiserfs_key)); + return memcmp(k1, k2, sizeof(struct reiserfs_key)); } /************************************************************************** @@ -184,373 +187,396 @@ inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_k there are no possible items, and we have not found it. With each examination we cut the number of possible items it could be by one more than half rounded down, or we find it. */ -static inline int bin_search ( - const void * p_v_key, /* Key to search for. */ - const void * p_v_base,/* First item in the array. */ - int p_n_num, /* Number of items in the array. */ - int p_n_width, /* Item size in the array. - searched. Lest the reader be - confused, note that this is crafted - as a general function, and when it - is applied specifically to the array - of item headers in a node, p_n_width - is actually the item header size not - the item size. */ - int * p_n_pos /* Number of the searched for element. */ - ) { - int n_rbound, n_lbound, n_j; - - for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 ) - switch( comp_keys((struct reiserfs_key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) ) { - case -1: n_lbound = n_j + 1; continue; - case 1: n_rbound = n_j - 1; continue; - case 0: *p_n_pos = n_j; return ITEM_FOUND; /* Key found in the array. */ - } - - /* bin_search did not find given key, it returns position of key, - that is minimal and greater than the given one. */ - *p_n_pos = n_lbound; - return ITEM_NOT_FOUND; +static inline int bin_search(const void *p_v_key, /* Key to search for. */ + const void *p_v_base, /* First item in the array. */ + int p_n_num, /* Number of items in the array. */ + int p_n_width, /* Item size in the array. + searched. Lest the reader be + confused, note that this is crafted + as a general function, and when it + is applied specifically to the array + of item headers in a node, p_n_width + is actually the item header size not + the item size. */ + int *p_n_pos /* Number of the searched for element. */ + ) +{ + int n_rbound, n_lbound, n_j; + + for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2; + n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2) + switch (comp_keys + ((struct reiserfs_key *)((char *)p_v_base + + n_j * p_n_width), + (struct cpu_key *)p_v_key)) { + case -1: + n_lbound = n_j + 1; + continue; + case 1: + n_rbound = n_j - 1; + continue; + case 0: + *p_n_pos = n_j; + return ITEM_FOUND; /* Key found in the array. */ + } + + /* bin_search did not find given key, it returns position of key, + that is minimal and greater than the given one. */ + *p_n_pos = n_lbound; + return ITEM_NOT_FOUND; } #ifdef CONFIG_REISERFS_CHECK -extern struct tree_balance * cur_tb; +extern struct tree_balance *cur_tb; #endif - - /* Minimal possible key. It is never in the tree. */ -const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}}; +const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; /* Maximal possible key. It is never in the tree. */ -static const struct reiserfs_key MAX_KEY = { +static const struct reiserfs_key MAX_KEY = { __constant_cpu_to_le32(0xffffffff), __constant_cpu_to_le32(0xffffffff), {{__constant_cpu_to_le32(0xffffffff), - __constant_cpu_to_le32(0xffffffff)},} + __constant_cpu_to_le32(0xffffffff)},} }; - /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom of the path, and going upwards. We must check the path's validity at each step. If the key is not in the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this case we return a special key, either MIN_KEY or MAX_KEY. */ -static inline const struct reiserfs_key * get_lkey ( - const struct path * p_s_chk_path, - const struct super_block * p_s_sb - ) { - int n_position, n_path_offset = p_s_chk_path->path_length; - struct buffer_head * p_s_parent; - - RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, - "PAP-5010: invalid offset in the path"); - - /* While not higher in path than first element. */ - while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { - - RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), - "PAP-5020: parent is not uptodate"); - - /* Parent at the path is not in the tree now. */ - if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) - return &MAX_KEY; - /* Check whether position in the parent is correct. */ - if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) - return &MAX_KEY; - /* Check whether parent at the path really points to the child. */ - if ( B_N_CHILD_NUM(p_s_parent, n_position) != - PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) - return &MAX_KEY; - /* Return delimiting key if position in the parent is not equal to zero. */ - if ( n_position ) - return B_N_PDELIM_KEY(p_s_parent, n_position - 1); - } - /* Return MIN_KEY if we are in the root of the buffer tree. */ - if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == - SB_ROOT_BLOCK (p_s_sb) ) - return &MIN_KEY; - return &MAX_KEY; +static inline const struct reiserfs_key *get_lkey(const struct path + *p_s_chk_path, + const struct super_block + *p_s_sb) +{ + int n_position, n_path_offset = p_s_chk_path->path_length; + struct buffer_head *p_s_parent; + + RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET, + "PAP-5010: invalid offset in the path"); + + /* While not higher in path than first element. */ + while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { + + RFALSE(!buffer_uptodate + (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), + "PAP-5020: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if (!B_IS_IN_TREE + (p_s_parent = + PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset))) + return &MAX_KEY; + /* Check whether position in the parent is correct. */ + if ((n_position = + PATH_OFFSET_POSITION(p_s_chk_path, + n_path_offset)) > + B_NR_ITEMS(p_s_parent)) + return &MAX_KEY; + /* Check whether parent at the path really points to the child. */ + if (B_N_CHILD_NUM(p_s_parent, n_position) != + PATH_OFFSET_PBUFFER(p_s_chk_path, + n_path_offset + 1)->b_blocknr) + return &MAX_KEY; + /* Return delimiting key if position in the parent is not equal to zero. */ + if (n_position) + return B_N_PDELIM_KEY(p_s_parent, n_position - 1); + } + /* Return MIN_KEY if we are in the root of the buffer tree. */ + if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)-> + b_blocknr == SB_ROOT_BLOCK(p_s_sb)) + return &MIN_KEY; + return &MAX_KEY; } - /* Get delimiting key of the buffer at the path and its right neighbor. */ -inline const struct reiserfs_key * get_rkey ( - const struct path * p_s_chk_path, - const struct super_block * p_s_sb - ) { - int n_position, - n_path_offset = p_s_chk_path->path_length; - struct buffer_head * p_s_parent; - - RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, - "PAP-5030: invalid offset in the path"); - - while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { - - RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), - "PAP-5040: parent is not uptodate"); - - /* Parent at the path is not in the tree now. */ - if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) - return &MIN_KEY; - /* Check whether position in the parent is correct. */ - if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) - return &MIN_KEY; - /* Check whether parent at the path really points to the child. */ - if ( B_N_CHILD_NUM(p_s_parent, n_position) != - PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) - return &MIN_KEY; - /* Return delimiting key if position in the parent is not the last one. */ - if ( n_position != B_NR_ITEMS(p_s_parent) ) - return B_N_PDELIM_KEY(p_s_parent, n_position); - } - /* Return MAX_KEY if we are in the root of the buffer tree. */ - if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == - SB_ROOT_BLOCK (p_s_sb) ) - return &MAX_KEY; - return &MIN_KEY; +inline const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path, + const struct super_block *p_s_sb) +{ + int n_position, n_path_offset = p_s_chk_path->path_length; + struct buffer_head *p_s_parent; + + RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET, + "PAP-5030: invalid offset in the path"); + + while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { + + RFALSE(!buffer_uptodate + (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), + "PAP-5040: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if (!B_IS_IN_TREE + (p_s_parent = + PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset))) + return &MIN_KEY; + /* Check whether position in the parent is correct. */ + if ((n_position = + PATH_OFFSET_POSITION(p_s_chk_path, + n_path_offset)) > + B_NR_ITEMS(p_s_parent)) + return &MIN_KEY; + /* Check whether parent at the path really points to the child. */ + if (B_N_CHILD_NUM(p_s_parent, n_position) != + PATH_OFFSET_PBUFFER(p_s_chk_path, + n_path_offset + 1)->b_blocknr) + return &MIN_KEY; + /* Return delimiting key if position in the parent is not the last one. */ + if (n_position != B_NR_ITEMS(p_s_parent)) + return B_N_PDELIM_KEY(p_s_parent, n_position); + } + /* Return MAX_KEY if we are in the root of the buffer tree. */ + if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)-> + b_blocknr == SB_ROOT_BLOCK(p_s_sb)) + return &MAX_KEY; + return &MIN_KEY; } - /* Check whether a key is contained in the tree rooted from a buffer at a path. */ /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in the path. These delimiting keys are stored at least one level above that buffer in the tree. If the buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ -static inline int key_in_buffer ( - struct path * p_s_chk_path, /* Path which should be checked. */ - const struct cpu_key * p_s_key, /* Key which should be checked. */ - struct super_block * p_s_sb /* Super block pointer. */ - ) { - - RFALSE( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET || - p_s_chk_path->path_length > MAX_HEIGHT, - "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", - p_s_key, p_s_chk_path->path_length); - RFALSE( !PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev, - "PAP-5060: device must not be NODEV"); - - if ( comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 ) - /* left delimiting key is bigger, that the key we look for */ - return 0; - // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 ) - if ( comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 ) - /* p_s_key must be less than right delimitiing key */ - return 0; - return 1; -} - +static inline int key_in_buffer(struct path *p_s_chk_path, /* Path which should be checked. */ + const struct cpu_key *p_s_key, /* Key which should be checked. */ + struct super_block *p_s_sb /* Super block pointer. */ + ) +{ -inline void decrement_bcount( - struct buffer_head * p_s_bh - ) { - if ( p_s_bh ) { - if ( atomic_read (&(p_s_bh->b_count)) ) { - put_bh(p_s_bh) ; - return; - } - reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh); - } + RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET + || p_s_chk_path->path_length > MAX_HEIGHT, + "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", + p_s_key, p_s_chk_path->path_length); + RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev, + "PAP-5060: device must not be NODEV"); + + if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1) + /* left delimiting key is bigger, that the key we look for */ + return 0; + // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 ) + if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1) + /* p_s_key must be less than right delimitiing key */ + return 0; + return 1; } +inline void decrement_bcount(struct buffer_head *p_s_bh) +{ + if (p_s_bh) { + if (atomic_read(&(p_s_bh->b_count))) { + put_bh(p_s_bh); + return; + } + reiserfs_panic(NULL, + "PAP-5070: decrement_bcount: trying to free free buffer %b", + p_s_bh); + } +} /* Decrement b_count field of the all buffers in the path. */ -void decrement_counters_in_path ( - struct path * p_s_search_path - ) { - int n_path_offset = p_s_search_path->path_length; - - RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET || - n_path_offset > EXTENDED_MAX_HEIGHT - 1, - "PAP-5080: invalid path offset of %d", n_path_offset); - - while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { - struct buffer_head * bh; - - bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); - decrement_bcount (bh); - } - p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; -} +void decrement_counters_in_path(struct path *p_s_search_path) +{ + int n_path_offset = p_s_search_path->path_length; + + RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET || + n_path_offset > EXTENDED_MAX_HEIGHT - 1, + "PAP-5080: invalid path offset of %d", n_path_offset); + while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { + struct buffer_head *bh; -int reiserfs_check_path(struct path *p) { - RFALSE( p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, - "path not properly relsed") ; - return 0 ; + bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); + decrement_bcount(bh); + } + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; } +int reiserfs_check_path(struct path *p) +{ + RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, + "path not properly relsed"); + return 0; +} /* Release all buffers in the path. Restore dirty bits clean ** when preparing the buffer for the log ** ** only called from fix_nodes() */ -void pathrelse_and_restore ( - struct super_block *s, - struct path * p_s_search_path - ) { - int n_path_offset = p_s_search_path->path_length; - - RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, - "clm-4000: invalid path offset"); - - while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { - reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path, - n_path_offset)); - brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); - } - p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path) +{ + int n_path_offset = p_s_search_path->path_length; + + RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, + "clm-4000: invalid path offset"); + + while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { + reiserfs_restore_prepared_buffer(s, + PATH_OFFSET_PBUFFER + (p_s_search_path, + n_path_offset)); + brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); + } + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; } /* Release all buffers in the path. */ -void pathrelse ( - struct path * p_s_search_path - ) { - int n_path_offset = p_s_search_path->path_length; - - RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, - "PAP-5090: invalid path offset"); - - while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) - brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); - - p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; -} +void pathrelse(struct path *p_s_search_path) +{ + int n_path_offset = p_s_search_path->path_length; + RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, + "PAP-5090: invalid path offset"); + while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) + brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); -static int is_leaf (char * buf, int blocksize, struct buffer_head * bh) -{ - struct block_head * blkh; - struct item_head * ih; - int used_space; - int prev_location; - int i; - int nr; - - blkh = (struct block_head *)buf; - if ( blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { - reiserfs_warning (NULL, "is_leaf: this should be caught earlier"); - return 0; - } + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +} - nr = blkh_nr_item(blkh); - if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { - /* item number is too big or too small */ - reiserfs_warning (NULL, "is_leaf: nr_item seems wrong: %z", bh); - return 0; - } - ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; - used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih)); - if (used_space != blocksize - blkh_free_space(blkh)) { - /* free space does not match to calculated amount of use space */ - reiserfs_warning (NULL, "is_leaf: free space seems wrong: %z", bh); - return 0; - } - - // FIXME: it is_leaf will hit performance too much - we may have - // return 1 here - - /* check tables of item heads */ - ih = (struct item_head *)(buf + BLKH_SIZE); - prev_location = blocksize; - for (i = 0; i < nr; i ++, ih ++) { - if ( le_ih_k_type(ih) == TYPE_ANY) { - reiserfs_warning (NULL, "is_leaf: wrong item type for item %h",ih); - return 0; +static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) +{ + struct block_head *blkh; + struct item_head *ih; + int used_space; + int prev_location; + int i; + int nr; + + blkh = (struct block_head *)buf; + if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { + reiserfs_warning(NULL, + "is_leaf: this should be caught earlier"); + return 0; } - if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) { - reiserfs_warning (NULL, "is_leaf: item location seems wrong: %h", ih); - return 0; + + nr = blkh_nr_item(blkh); + if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { + /* item number is too big or too small */ + reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh); + return 0; } - if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) { - reiserfs_warning (NULL, "is_leaf: item length seems wrong: %h", ih); - return 0; + ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; + used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); + if (used_space != blocksize - blkh_free_space(blkh)) { + /* free space does not match to calculated amount of use space */ + reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z", + bh); + return 0; } - if (prev_location - ih_location (ih) != ih_item_len (ih)) { - reiserfs_warning (NULL, "is_leaf: item location seems wrong (second one): %h", ih); - return 0; + // FIXME: it is_leaf will hit performance too much - we may have + // return 1 here + + /* check tables of item heads */ + ih = (struct item_head *)(buf + BLKH_SIZE); + prev_location = blocksize; + for (i = 0; i < nr; i++, ih++) { + if (le_ih_k_type(ih) == TYPE_ANY) { + reiserfs_warning(NULL, + "is_leaf: wrong item type for item %h", + ih); + return 0; + } + if (ih_location(ih) >= blocksize + || ih_location(ih) < IH_SIZE * nr) { + reiserfs_warning(NULL, + "is_leaf: item location seems wrong: %h", + ih); + return 0; + } + if (ih_item_len(ih) < 1 + || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) { + reiserfs_warning(NULL, + "is_leaf: item length seems wrong: %h", + ih); + return 0; + } + if (prev_location - ih_location(ih) != ih_item_len(ih)) { + reiserfs_warning(NULL, + "is_leaf: item location seems wrong (second one): %h", + ih); + return 0; + } + prev_location = ih_location(ih); } - prev_location = ih_location (ih); - } - // one may imagine much more checks - return 1; + // one may imagine much more checks + return 1; } - /* returns 1 if buf looks like an internal node, 0 otherwise */ -static int is_internal (char * buf, int blocksize, struct buffer_head * bh) +static int is_internal(char *buf, int blocksize, struct buffer_head *bh) { - struct block_head * blkh; - int nr; - int used_space; - - blkh = (struct block_head *)buf; - nr = blkh_level(blkh); - if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { - /* this level is not possible for internal nodes */ - reiserfs_warning (NULL, "is_internal: this should be caught earlier"); - return 0; - } - - nr = blkh_nr_item(blkh); - if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { - /* for internal which is not root we might check min number of keys */ - reiserfs_warning (NULL, "is_internal: number of key seems wrong: %z", bh); - return 0; - } + struct block_head *blkh; + int nr; + int used_space; + + blkh = (struct block_head *)buf; + nr = blkh_level(blkh); + if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { + /* this level is not possible for internal nodes */ + reiserfs_warning(NULL, + "is_internal: this should be caught earlier"); + return 0; + } - used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); - if (used_space != blocksize - blkh_free_space(blkh)) { - reiserfs_warning (NULL, "is_internal: free space seems wrong: %z", bh); - return 0; - } + nr = blkh_nr_item(blkh); + if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { + /* for internal which is not root we might check min number of keys */ + reiserfs_warning(NULL, + "is_internal: number of key seems wrong: %z", + bh); + return 0; + } - // one may imagine much more checks - return 1; + used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); + if (used_space != blocksize - blkh_free_space(blkh)) { + reiserfs_warning(NULL, + "is_internal: free space seems wrong: %z", bh); + return 0; + } + // one may imagine much more checks + return 1; } - // make sure that bh contains formatted node of reiserfs tree of // 'level'-th level -static int is_tree_node (struct buffer_head * bh, int level) +static int is_tree_node(struct buffer_head *bh, int level) { - if (B_LEVEL (bh) != level) { - reiserfs_warning (NULL, "is_tree_node: node level %d does not match to the expected one %d", - B_LEVEL (bh), level); - return 0; - } - if (level == DISK_LEAF_NODE_LEVEL) - return is_leaf (bh->b_data, bh->b_size, bh); + if (B_LEVEL(bh) != level) { + reiserfs_warning(NULL, + "is_tree_node: node level %d does not match to the expected one %d", + B_LEVEL(bh), level); + return 0; + } + if (level == DISK_LEAF_NODE_LEVEL) + return is_leaf(bh->b_data, bh->b_size, bh); - return is_internal (bh->b_data, bh->b_size, bh); + return is_internal(bh->b_data, bh->b_size, bh); } - - #define SEARCH_BY_KEY_READA 16 /* The function is NOT SCHEDULE-SAFE! */ -static void search_by_key_reada (struct super_block * s, - struct buffer_head **bh, - unsigned long *b, int num) +static void search_by_key_reada(struct super_block *s, + struct buffer_head **bh, + unsigned long *b, int num) { - int i,j; - - for (i = 0 ; i < num ; i++) { - bh[i] = sb_getblk (s, b[i]); - } - for (j = 0 ; j < i ; j++) { - /* - * note, this needs attention if we are getting rid of the BKL - * you have to make sure the prepared bit isn't set on this buffer - */ - if (!buffer_uptodate(bh[j])) - ll_rw_block(READA, 1, bh + j); - brelse(bh[j]); - } + int i, j; + + for (i = 0; i < num; i++) { + bh[i] = sb_getblk(s, b[i]); + } + for (j = 0; j < i; j++) { + /* + * note, this needs attention if we are getting rid of the BKL + * you have to make sure the prepared bit isn't set on this buffer + */ + if (!buffer_uptodate(bh[j])) + ll_rw_block(READA, 1, bh + j); + brelse(bh[j]); + } } /************************************************************************** @@ -576,194 +602,200 @@ static void search_by_key_reada (struct super_block * s, correctness of the top of the path but need not be checked for the correctness of the bottom of the path */ /* The function is NOT SCHEDULE-SAFE! */ -int search_by_key (struct super_block * p_s_sb, - const struct cpu_key * p_s_key, /* Key to search. */ - struct path * p_s_search_path, /* This structure was - allocated and initialized - by the calling - function. It is filled up - by this function. */ - int n_stop_level /* How far down the tree to search. To - stop at leaf level - set to - DISK_LEAF_NODE_LEVEL */ - ) { - int n_block_number; - int expected_level; - struct buffer_head * p_s_bh; - struct path_element * p_s_last_element; - int n_node_level, n_retval; - int right_neighbor_of_leaf_node; - int fs_gen; - struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; - unsigned long reada_blocks[SEARCH_BY_KEY_READA]; - int reada_count = 0; +int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /* Key to search. */ + struct path *p_s_search_path, /* This structure was + allocated and initialized + by the calling + function. It is filled up + by this function. */ + int n_stop_level /* How far down the tree to search. To + stop at leaf level - set to + DISK_LEAF_NODE_LEVEL */ + ) +{ + int n_block_number; + int expected_level; + struct buffer_head *p_s_bh; + struct path_element *p_s_last_element; + int n_node_level, n_retval; + int right_neighbor_of_leaf_node; + int fs_gen; + struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; + unsigned long reada_blocks[SEARCH_BY_KEY_READA]; + int reada_count = 0; #ifdef CONFIG_REISERFS_CHECK - int n_repeat_counter = 0; + int n_repeat_counter = 0; #endif - - PROC_INFO_INC( p_s_sb, search_by_key ); - - /* As we add each node to a path we increase its count. This means that - we must be careful to release all nodes in a path before we either - discard the path struct or re-use the path struct, as we do here. */ - decrement_counters_in_path(p_s_search_path); + PROC_INFO_INC(p_s_sb, search_by_key); + + /* As we add each node to a path we increase its count. This means that + we must be careful to release all nodes in a path before we either + discard the path struct or re-use the path struct, as we do here. */ - right_neighbor_of_leaf_node = 0; + decrement_counters_in_path(p_s_search_path); - /* With each iteration of this loop we search through the items in the - current node, and calculate the next current node(next path element) - for the next iteration of this loop.. */ - n_block_number = SB_ROOT_BLOCK (p_s_sb); - expected_level = -1; - while ( 1 ) { + right_neighbor_of_leaf_node = 0; + + /* With each iteration of this loop we search through the items in the + current node, and calculate the next current node(next path element) + for the next iteration of this loop.. */ + n_block_number = SB_ROOT_BLOCK(p_s_sb); + expected_level = -1; + while (1) { #ifdef CONFIG_REISERFS_CHECK - if ( !(++n_repeat_counter % 50000) ) - reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:" - "there were %d iterations of while loop " - "looking for key %K", - current->comm, n_repeat_counter, p_s_key); + if (!(++n_repeat_counter % 50000)) + reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:" + "there were %d iterations of while loop " + "looking for key %K", + current->comm, n_repeat_counter, + p_s_key); #endif - /* prep path to have another element added to it. */ - p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length); - fs_gen = get_generation (p_s_sb); - - /* Read the next tree node, and set the last element in the path to - have a pointer to it. */ - if ((p_s_bh = p_s_last_element->pe_buffer = - sb_getblk(p_s_sb, n_block_number)) ) { - if (!buffer_uptodate(p_s_bh) && reada_count > 1) { - search_by_key_reada (p_s_sb, reada_bh, - reada_blocks, reada_count); - } - ll_rw_block(READ, 1, &p_s_bh); - wait_on_buffer(p_s_bh); - if (!buffer_uptodate(p_s_bh)) - goto io_error; - } else { -io_error: - p_s_search_path->path_length --; - pathrelse(p_s_search_path); - return IO_ERROR; - } - reada_count = 0; - if (expected_level == -1) - expected_level = SB_TREE_HEIGHT (p_s_sb); - expected_level --; - - /* It is possible that schedule occurred. We must check whether the key - to search is still in the tree rooted from the current buffer. If - not then repeat search from the root. */ - if ( fs_changed (fs_gen, p_s_sb) && - (!B_IS_IN_TREE (p_s_bh) || - B_LEVEL(p_s_bh) != expected_level || - !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) { - PROC_INFO_INC( p_s_sb, search_by_key_fs_changed ); - PROC_INFO_INC( p_s_sb, search_by_key_restarted ); - PROC_INFO_INC( p_s_sb, sbk_restarted[ expected_level - 1 ] ); - decrement_counters_in_path(p_s_search_path); - - /* Get the root block number so that we can repeat the search - starting from the root. */ - n_block_number = SB_ROOT_BLOCK (p_s_sb); - expected_level = -1; - right_neighbor_of_leaf_node = 0; - - /* repeat search from the root */ - continue; - } + /* prep path to have another element added to it. */ + p_s_last_element = + PATH_OFFSET_PELEMENT(p_s_search_path, + ++p_s_search_path->path_length); + fs_gen = get_generation(p_s_sb); + + /* Read the next tree node, and set the last element in the path to + have a pointer to it. */ + if ((p_s_bh = p_s_last_element->pe_buffer = + sb_getblk(p_s_sb, n_block_number))) { + if (!buffer_uptodate(p_s_bh) && reada_count > 1) { + search_by_key_reada(p_s_sb, reada_bh, + reada_blocks, reada_count); + } + ll_rw_block(READ, 1, &p_s_bh); + wait_on_buffer(p_s_bh); + if (!buffer_uptodate(p_s_bh)) + goto io_error; + } else { + io_error: + p_s_search_path->path_length--; + pathrelse(p_s_search_path); + return IO_ERROR; + } + reada_count = 0; + if (expected_level == -1) + expected_level = SB_TREE_HEIGHT(p_s_sb); + expected_level--; + + /* It is possible that schedule occurred. We must check whether the key + to search is still in the tree rooted from the current buffer. If + not then repeat search from the root. */ + if (fs_changed(fs_gen, p_s_sb) && + (!B_IS_IN_TREE(p_s_bh) || + B_LEVEL(p_s_bh) != expected_level || + !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) { + PROC_INFO_INC(p_s_sb, search_by_key_fs_changed); + PROC_INFO_INC(p_s_sb, search_by_key_restarted); + PROC_INFO_INC(p_s_sb, + sbk_restarted[expected_level - 1]); + decrement_counters_in_path(p_s_search_path); + + /* Get the root block number so that we can repeat the search + starting from the root. */ + n_block_number = SB_ROOT_BLOCK(p_s_sb); + expected_level = -1; + right_neighbor_of_leaf_node = 0; + + /* repeat search from the root */ + continue; + } - /* only check that the key is in the buffer if p_s_key is not - equal to the MAX_KEY. Latter case is only possible in - "finish_unfinished()" processing during mount. */ - RFALSE( comp_keys( &MAX_KEY, p_s_key ) && - ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb), - "PAP-5130: key is not in the buffer"); + /* only check that the key is in the buffer if p_s_key is not + equal to the MAX_KEY. Latter case is only possible in + "finish_unfinished()" processing during mount. */ + RFALSE(comp_keys(&MAX_KEY, p_s_key) && + !key_in_buffer(p_s_search_path, p_s_key, p_s_sb), + "PAP-5130: key is not in the buffer"); #ifdef CONFIG_REISERFS_CHECK - if ( cur_tb ) { - print_cur_tb ("5140"); - reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!"); - } + if (cur_tb) { + print_cur_tb("5140"); + reiserfs_panic(p_s_sb, + "PAP-5140: search_by_key: schedule occurred in do_balance!"); + } #endif - // make sure, that the node contents look like a node of - // certain level - if (!is_tree_node (p_s_bh, expected_level)) { - reiserfs_warning (p_s_sb, "vs-5150: search_by_key: " - "invalid format found in block %ld. Fsck?", - p_s_bh->b_blocknr); - pathrelse (p_s_search_path); - return IO_ERROR; - } - - /* ok, we have acquired next formatted node in the tree */ - n_node_level = B_LEVEL (p_s_bh); - - PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level - 1 ); - - RFALSE( n_node_level < n_stop_level, - "vs-5152: tree level (%d) is less than stop level (%d)", - n_node_level, n_stop_level); - - n_retval = bin_search( p_s_key, B_N_PITEM_HEAD(p_s_bh, 0), - B_NR_ITEMS(p_s_bh), - ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE, - &(p_s_last_element->pe_position)); - if (n_node_level == n_stop_level) { - return n_retval; - } + // make sure, that the node contents look like a node of + // certain level + if (!is_tree_node(p_s_bh, expected_level)) { + reiserfs_warning(p_s_sb, "vs-5150: search_by_key: " + "invalid format found in block %ld. Fsck?", + p_s_bh->b_blocknr); + pathrelse(p_s_search_path); + return IO_ERROR; + } - /* we are not in the stop level */ - if (n_retval == ITEM_FOUND) - /* item has been found, so we choose the pointer which is to the right of the found one */ - p_s_last_element->pe_position++; + /* ok, we have acquired next formatted node in the tree */ + n_node_level = B_LEVEL(p_s_bh); - /* if item was not found we choose the position which is to - the left of the found item. This requires no code, - bin_search did it already.*/ + PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1); - /* So we have chosen a position in the current node which is - an internal node. Now we calculate child block number by - position in the node. */ - n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position); + RFALSE(n_node_level < n_stop_level, + "vs-5152: tree level (%d) is less than stop level (%d)", + n_node_level, n_stop_level); - /* if we are going to read leaf nodes, try for read ahead as well */ - if ((p_s_search_path->reada & PATH_READA) && - n_node_level == DISK_LEAF_NODE_LEVEL + 1) - { - int pos = p_s_last_element->pe_position; - int limit = B_NR_ITEMS(p_s_bh); - struct reiserfs_key *le_key; - - if (p_s_search_path->reada & PATH_READA_BACK) - limit = 0; - while(reada_count < SEARCH_BY_KEY_READA) { - if (pos == limit) - break; - reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos); - if (p_s_search_path->reada & PATH_READA_BACK) - pos--; - else - pos++; + n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0), + B_NR_ITEMS(p_s_bh), + (n_node_level == + DISK_LEAF_NODE_LEVEL) ? IH_SIZE : + KEY_SIZE, + &(p_s_last_element->pe_position)); + if (n_node_level == n_stop_level) { + return n_retval; + } - /* - * check to make sure we're in the same object - */ - le_key = B_N_PDELIM_KEY(p_s_bh, pos); - if (le32_to_cpu(le_key->k_objectid) != - p_s_key->on_disk_key.k_objectid) - { - break; + /* we are not in the stop level */ + if (n_retval == ITEM_FOUND) + /* item has been found, so we choose the pointer which is to the right of the found one */ + p_s_last_element->pe_position++; + + /* if item was not found we choose the position which is to + the left of the found item. This requires no code, + bin_search did it already. */ + + /* So we have chosen a position in the current node which is + an internal node. Now we calculate child block number by + position in the node. */ + n_block_number = + B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position); + + /* if we are going to read leaf nodes, try for read ahead as well */ + if ((p_s_search_path->reada & PATH_READA) && + n_node_level == DISK_LEAF_NODE_LEVEL + 1) { + int pos = p_s_last_element->pe_position; + int limit = B_NR_ITEMS(p_s_bh); + struct reiserfs_key *le_key; + + if (p_s_search_path->reada & PATH_READA_BACK) + limit = 0; + while (reada_count < SEARCH_BY_KEY_READA) { + if (pos == limit) + break; + reada_blocks[reada_count++] = + B_N_CHILD_NUM(p_s_bh, pos); + if (p_s_search_path->reada & PATH_READA_BACK) + pos--; + else + pos++; + + /* + * check to make sure we're in the same object + */ + le_key = B_N_PDELIM_KEY(p_s_bh, pos); + if (le32_to_cpu(le_key->k_objectid) != + p_s_key->on_disk_key.k_objectid) { + break; + } + } } - } - } - } + } } - /* Form the path to an item and position in this item which contains file byte defined by p_s_key. If there is no such item corresponding to the key, we point the path to the item with @@ -780,94 +812,97 @@ io_error: units of directory entries. */ /* The function is NOT SCHEDULE-SAFE! */ -int search_for_position_by_key (struct super_block * p_s_sb, /* Pointer to the super block. */ - const struct cpu_key * p_cpu_key, /* Key to search (cpu variable) */ - struct path * p_s_search_path /* Filled up by this function. */ - ) { - struct item_head * p_le_ih; /* pointer to on-disk structure */ - int n_blk_size; - loff_t item_offset, offset; - struct reiserfs_dir_entry de; - int retval; - - /* If searching for directory entry. */ - if ( is_direntry_cpu_key (p_cpu_key) ) - return search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de); - - /* If not searching for directory entry. */ - - /* If item is found. */ - retval = search_item (p_s_sb, p_cpu_key, p_s_search_path); - if (retval == IO_ERROR) - return retval; - if ( retval == ITEM_FOUND ) { - - RFALSE( ! ih_item_len( - B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), - PATH_LAST_POSITION(p_s_search_path))), - "PAP-5165: item length equals zero"); +int search_for_position_by_key(struct super_block *p_s_sb, /* Pointer to the super block. */ + const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ + struct path *p_s_search_path /* Filled up by this function. */ + ) +{ + struct item_head *p_le_ih; /* pointer to on-disk structure */ + int n_blk_size; + loff_t item_offset, offset; + struct reiserfs_dir_entry de; + int retval; + + /* If searching for directory entry. */ + if (is_direntry_cpu_key(p_cpu_key)) + return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path, + &de); + + /* If not searching for directory entry. */ + + /* If item is found. */ + retval = search_item(p_s_sb, p_cpu_key, p_s_search_path); + if (retval == IO_ERROR) + return retval; + if (retval == ITEM_FOUND) { - pos_in_item(p_s_search_path) = 0; - return POSITION_FOUND; - } + RFALSE(!ih_item_len + (B_N_PITEM_HEAD + (PATH_PLAST_BUFFER(p_s_search_path), + PATH_LAST_POSITION(p_s_search_path))), + "PAP-5165: item length equals zero"); - RFALSE( ! PATH_LAST_POSITION(p_s_search_path), - "PAP-5170: position equals zero"); + pos_in_item(p_s_search_path) = 0; + return POSITION_FOUND; + } - /* Item is not found. Set path to the previous item. */ - p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path)); - n_blk_size = p_s_sb->s_blocksize; + RFALSE(!PATH_LAST_POSITION(p_s_search_path), + "PAP-5170: position equals zero"); - if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) { - return FILE_NOT_FOUND; - } + /* Item is not found. Set path to the previous item. */ + p_le_ih = + B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), + --PATH_LAST_POSITION(p_s_search_path)); + n_blk_size = p_s_sb->s_blocksize; - // FIXME: quite ugly this far + if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { + return FILE_NOT_FOUND; + } + // FIXME: quite ugly this far - item_offset = le_ih_k_offset (p_le_ih); - offset = cpu_key_k_offset (p_cpu_key); + item_offset = le_ih_k_offset(p_le_ih); + offset = cpu_key_k_offset(p_cpu_key); - /* Needed byte is contained in the item pointed to by the path.*/ - if (item_offset <= offset && - item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) { - pos_in_item (p_s_search_path) = offset - item_offset; - if ( is_indirect_le_ih(p_le_ih) ) { - pos_in_item (p_s_search_path) /= n_blk_size; + /* Needed byte is contained in the item pointed to by the path. */ + if (item_offset <= offset && + item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) { + pos_in_item(p_s_search_path) = offset - item_offset; + if (is_indirect_le_ih(p_le_ih)) { + pos_in_item(p_s_search_path) /= n_blk_size; + } + return POSITION_FOUND; } - return POSITION_FOUND; - } - - /* Needed byte is not contained in the item pointed to by the - path. Set pos_in_item out of the item. */ - if ( is_indirect_le_ih (p_le_ih) ) - pos_in_item (p_s_search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE; - else - pos_in_item (p_s_search_path) = ih_item_len( p_le_ih ); - - return POSITION_NOT_FOUND; -} + /* Needed byte is not contained in the item pointed to by the + path. Set pos_in_item out of the item. */ + if (is_indirect_le_ih(p_le_ih)) + pos_in_item(p_s_search_path) = + ih_item_len(p_le_ih) / UNFM_P_SIZE; + else + pos_in_item(p_s_search_path) = ih_item_len(p_le_ih); + + return POSITION_NOT_FOUND; +} /* Compare given item and item pointed to by the path. */ -int comp_items (const struct item_head * stored_ih, const struct path * p_s_path) +int comp_items(const struct item_head *stored_ih, const struct path *p_s_path) { - struct buffer_head * p_s_bh; - struct item_head * ih; + struct buffer_head *p_s_bh; + struct item_head *ih; - /* Last buffer at the path is not in the tree. */ - if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) ) - return 1; + /* Last buffer at the path is not in the tree. */ + if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path))) + return 1; - /* Last path position is invalid. */ - if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) ) - return 1; + /* Last path position is invalid. */ + if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh)) + return 1; - /* we need only to know, whether it is the same item */ - ih = get_ih (p_s_path); - return memcmp (stored_ih, ih, IH_SIZE); + /* we need only to know, whether it is the same item */ + ih = get_ih(p_s_path); + return memcmp(stored_ih, ih, IH_SIZE); } - /* unformatted nodes are not logged anymore, ever. This is safe ** now */ @@ -876,461 +911,466 @@ int comp_items (const struct item_head * stored_ih, const struct path * p_s_path // block can not be forgotten as it is in I/O or held by someone #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) - - // prepare for delete or cut of direct item -static inline int prepare_for_direct_item (struct path * path, - struct item_head * le_ih, - struct inode * inode, - loff_t new_file_length, - int * cut_size) +static inline int prepare_for_direct_item(struct path *path, + struct item_head *le_ih, + struct inode *inode, + loff_t new_file_length, int *cut_size) { - loff_t round_len; - - - if ( new_file_length == max_reiserfs_offset (inode) ) { - /* item has to be deleted */ - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; - } - - // new file gets truncated - if (get_inode_item_key_version (inode) == KEY_FORMAT_3_6) { - // - round_len = ROUND_UP (new_file_length); - /* this was n_new_file_length < le_ih ... */ - if ( round_len < le_ih_k_offset (le_ih) ) { - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; /* Delete this item. */ + loff_t round_len; + + if (new_file_length == max_reiserfs_offset(inode)) { + /* item has to be deleted */ + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; + } + // new file gets truncated + if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { + // + round_len = ROUND_UP(new_file_length); + /* this was n_new_file_length < le_ih ... */ + if (round_len < le_ih_k_offset(le_ih)) { + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; /* Delete this item. */ + } + /* Calculate first position and size for cutting from item. */ + pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1); + *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); + + return M_CUT; /* Cut from this item. */ + } + + // old file: items may have any length + + if (new_file_length < le_ih_k_offset(le_ih)) { + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; /* Delete this item. */ } /* Calculate first position and size for cutting from item. */ - pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1); - *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); - - return M_CUT; /* Cut from this item. */ - } - - - // old file: items may have any length - - if ( new_file_length < le_ih_k_offset (le_ih) ) { - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; /* Delete this item. */ - } - /* Calculate first position and size for cutting from item. */ - *cut_size = -(ih_item_len(le_ih) - - (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih))); - return M_CUT; /* Cut from this item. */ + *cut_size = -(ih_item_len(le_ih) - + (pos_in_item(path) = + new_file_length + 1 - le_ih_k_offset(le_ih))); + return M_CUT; /* Cut from this item. */ } - -static inline int prepare_for_direntry_item (struct path * path, - struct item_head * le_ih, - struct inode * inode, - loff_t new_file_length, - int * cut_size) +static inline int prepare_for_direntry_item(struct path *path, + struct item_head *le_ih, + struct inode *inode, + loff_t new_file_length, + int *cut_size) { - if (le_ih_k_offset (le_ih) == DOT_OFFSET && - new_file_length == max_reiserfs_offset (inode)) { - RFALSE( ih_entry_count (le_ih) != 2, - "PAP-5220: incorrect empty directory item (%h)", le_ih); - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ - } - - if ( ih_entry_count (le_ih) == 1 ) { - /* Delete the directory item such as there is one record only - in this item*/ - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; - } - - /* Cut one record from the directory item. */ - *cut_size = -(DEH_SIZE + entry_length (get_last_bh (path), le_ih, pos_in_item (path))); - return M_CUT; -} + if (le_ih_k_offset(le_ih) == DOT_OFFSET && + new_file_length == max_reiserfs_offset(inode)) { + RFALSE(ih_entry_count(le_ih) != 2, + "PAP-5220: incorrect empty directory item (%h)", le_ih); + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ + } + if (ih_entry_count(le_ih) == 1) { + /* Delete the directory item such as there is one record only + in this item */ + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; + } + + /* Cut one record from the directory item. */ + *cut_size = + -(DEH_SIZE + + entry_length(get_last_bh(path), le_ih, pos_in_item(path))); + return M_CUT; +} /* If the path points to a directory or direct item, calculate mode and the size cut, for balance. If the path points to an indirect item, remove some number of its unformatted nodes. In case of file truncate calculate whether this item must be deleted/truncated or last unformatted node of this item will be converted to a direct item. This function returns a determination of what balance mode the calling function should employ. */ -static char prepare_for_delete_or_cut( - struct reiserfs_transaction_handle *th, - struct inode * inode, - struct path * p_s_path, - const struct cpu_key * p_s_item_key, - int * p_n_removed, /* Number of unformatted nodes which were removed - from end of the file. */ - int * p_n_cut_size, - unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ - ) { - struct super_block * p_s_sb = inode->i_sb; - struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_path); - struct buffer_head * p_s_bh = PATH_PLAST_BUFFER(p_s_path); - - BUG_ON (!th->t_trans_id); - - /* Stat_data item. */ - if ( is_statdata_le_ih (p_le_ih) ) { - - RFALSE( n_new_file_length != max_reiserfs_offset (inode), - "PAP-5210: mode must be M_DELETE"); - - *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); - return M_DELETE; - } - - - /* Directory item. */ - if ( is_direntry_le_ih (p_le_ih) ) - return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size); - - /* Direct item. */ - if ( is_direct_le_ih (p_le_ih) ) - return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size); - - - /* Case of an indirect item. */ - { - int n_unfm_number, /* Number of the item unformatted nodes. */ - n_counter, - n_blk_size; - __le32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */ - __u32 tmp; - struct item_head s_ih; /* Item header. */ - char c_mode; /* Returned mode of the balance. */ - int need_research; +static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed, /* Number of unformatted nodes which were removed + from end of the file. */ + int *p_n_cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ + ) +{ + struct super_block *p_s_sb = inode->i_sb; + struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path); + struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path); + BUG_ON(!th->t_trans_id); - n_blk_size = p_s_sb->s_blocksize; + /* Stat_data item. */ + if (is_statdata_le_ih(p_le_ih)) { - /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */ - do { - need_research = 0; - p_s_bh = PATH_PLAST_BUFFER(p_s_path); - /* Copy indirect item header to a temp variable. */ - copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); - /* Calculate number of unformatted nodes in this item. */ - n_unfm_number = I_UNFM_NUM(&s_ih); - - RFALSE( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number || - pos_in_item (p_s_path) + 1 != n_unfm_number, - "PAP-5240: invalid item %h " - "n_unfm_number = %d *p_n_pos_in_item = %d", - &s_ih, n_unfm_number, pos_in_item (p_s_path)); - - /* Calculate balance mode and position in the item to remove unformatted nodes. */ - if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */ - pos_in_item (p_s_path) = 0; - *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih)); - c_mode = M_DELETE; - } - else { /* Case of truncate. */ - if ( n_new_file_length < le_ih_k_offset (&s_ih) ) { - pos_in_item (p_s_path) = 0; - *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih)); - c_mode = M_DELETE; /* Delete this item. */ - } - else { - /* indirect item must be truncated starting from *p_n_pos_in_item-th position */ - pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits; - - RFALSE( pos_in_item (p_s_path) > n_unfm_number, - "PAP-5250: invalid position in the item"); - - /* Either convert last unformatted node of indirect item to direct item or increase - its free space. */ - if ( pos_in_item (p_s_path) == n_unfm_number ) { - *p_n_cut_size = 0; /* Nothing to cut. */ - return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */ - } - /* Calculate size to cut. */ - *p_n_cut_size = -(ih_item_len(&s_ih) - pos_in_item(p_s_path) * UNFM_P_SIZE); - - c_mode = M_CUT; /* Cut from this indirect item. */ - } - } - - RFALSE( n_unfm_number <= pos_in_item (p_s_path), - "PAP-5260: invalid position in the indirect item"); - - /* pointers to be cut */ - n_unfm_number -= pos_in_item (p_s_path); - /* Set pointer to the last unformatted node pointer that is to be cut. */ - p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed; - - - /* We go through the unformatted nodes pointers of the indirect - item and look for the unformatted nodes in the cache. If we - found some of them we free it, zero corresponding indirect item - entry and log buffer containing that indirect item. For this we - need to prepare last path element for logging. If some - unformatted node has b_count > 1 we must not free this - unformatted node since it is in use. */ - reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1); - // note: path could be changed, first line in for loop takes care - // of it - - for (n_counter = *p_n_removed; - n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) { - - cond_resched(); - if (item_moved (&s_ih, p_s_path)) { - need_research = 1 ; - break; - } - RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) || - p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1, - "vs-5265: pointer out of range"); + RFALSE(n_new_file_length != max_reiserfs_offset(inode), + "PAP-5210: mode must be M_DELETE"); - /* Hole, nothing to remove. */ - if ( ! get_block_num(p_n_unfm_pointer,0) ) { - (*p_n_removed)++; - continue; - } + *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); + return M_DELETE; + } - (*p_n_removed)++; + /* Directory item. */ + if (is_direntry_le_ih(p_le_ih)) + return prepare_for_direntry_item(p_s_path, p_le_ih, inode, + n_new_file_length, + p_n_cut_size); - tmp = get_block_num(p_n_unfm_pointer,0); - put_block_num(p_n_unfm_pointer, 0, 0); - journal_mark_dirty (th, p_s_sb, p_s_bh); - reiserfs_free_block(th, inode, tmp, 1); - if ( item_moved (&s_ih, p_s_path) ) { - need_research = 1; - break ; - } - } - - /* a trick. If the buffer has been logged, this - ** will do nothing. If we've broken the loop without - ** logging it, it will restore the buffer - ** - */ - reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh); - - /* This loop can be optimized. */ - } while ( (*p_n_removed < n_unfm_number || need_research) && - search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND ); - - RFALSE( *p_n_removed < n_unfm_number, - "PAP-5310: indirect item is not found"); - RFALSE( item_moved (&s_ih, p_s_path), - "after while, comp failed, retry") ; - - if (c_mode == M_CUT) - pos_in_item (p_s_path) *= UNFM_P_SIZE; - return c_mode; - } + /* Direct item. */ + if (is_direct_le_ih(p_le_ih)) + return prepare_for_direct_item(p_s_path, p_le_ih, inode, + n_new_file_length, p_n_cut_size); + + /* Case of an indirect item. */ + { + int n_unfm_number, /* Number of the item unformatted nodes. */ + n_counter, n_blk_size; + __le32 *p_n_unfm_pointer; /* Pointer to the unformatted node number. */ + __u32 tmp; + struct item_head s_ih; /* Item header. */ + char c_mode; /* Returned mode of the balance. */ + int need_research; + + n_blk_size = p_s_sb->s_blocksize; + + /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */ + do { + need_research = 0; + p_s_bh = PATH_PLAST_BUFFER(p_s_path); + /* Copy indirect item header to a temp variable. */ + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); + /* Calculate number of unformatted nodes in this item. */ + n_unfm_number = I_UNFM_NUM(&s_ih); + + RFALSE(!is_indirect_le_ih(&s_ih) || !n_unfm_number || + pos_in_item(p_s_path) + 1 != n_unfm_number, + "PAP-5240: invalid item %h " + "n_unfm_number = %d *p_n_pos_in_item = %d", + &s_ih, n_unfm_number, pos_in_item(p_s_path)); + + /* Calculate balance mode and position in the item to remove unformatted nodes. */ + if (n_new_file_length == max_reiserfs_offset(inode)) { /* Case of delete. */ + pos_in_item(p_s_path) = 0; + *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih)); + c_mode = M_DELETE; + } else { /* Case of truncate. */ + if (n_new_file_length < le_ih_k_offset(&s_ih)) { + pos_in_item(p_s_path) = 0; + *p_n_cut_size = + -(IH_SIZE + ih_item_len(&s_ih)); + c_mode = M_DELETE; /* Delete this item. */ + } else { + /* indirect item must be truncated starting from *p_n_pos_in_item-th position */ + pos_in_item(p_s_path) = + (n_new_file_length + n_blk_size - + le_ih_k_offset(&s_ih)) >> p_s_sb-> + s_blocksize_bits; + + RFALSE(pos_in_item(p_s_path) > + n_unfm_number, + "PAP-5250: invalid position in the item"); + + /* Either convert last unformatted node of indirect item to direct item or increase + its free space. */ + if (pos_in_item(p_s_path) == + n_unfm_number) { + *p_n_cut_size = 0; /* Nothing to cut. */ + return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */ + } + /* Calculate size to cut. */ + *p_n_cut_size = + -(ih_item_len(&s_ih) - + pos_in_item(p_s_path) * + UNFM_P_SIZE); + + c_mode = M_CUT; /* Cut from this indirect item. */ + } + } + + RFALSE(n_unfm_number <= pos_in_item(p_s_path), + "PAP-5260: invalid position in the indirect item"); + + /* pointers to be cut */ + n_unfm_number -= pos_in_item(p_s_path); + /* Set pointer to the last unformatted node pointer that is to be cut. */ + p_n_unfm_pointer = + (__le32 *) B_I_PITEM(p_s_bh, + &s_ih) + I_UNFM_NUM(&s_ih) - + 1 - *p_n_removed; + + /* We go through the unformatted nodes pointers of the indirect + item and look for the unformatted nodes in the cache. If we + found some of them we free it, zero corresponding indirect item + entry and log buffer containing that indirect item. For this we + need to prepare last path element for logging. If some + unformatted node has b_count > 1 we must not free this + unformatted node since it is in use. */ + reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1); + // note: path could be changed, first line in for loop takes care + // of it + + for (n_counter = *p_n_removed; + n_counter < n_unfm_number; + n_counter++, p_n_unfm_pointer--) { + + cond_resched(); + if (item_moved(&s_ih, p_s_path)) { + need_research = 1; + break; + } + RFALSE(p_n_unfm_pointer < + (__le32 *) B_I_PITEM(p_s_bh, &s_ih) + || p_n_unfm_pointer > + (__le32 *) B_I_PITEM(p_s_bh, + &s_ih) + + I_UNFM_NUM(&s_ih) - 1, + "vs-5265: pointer out of range"); + + /* Hole, nothing to remove. */ + if (!get_block_num(p_n_unfm_pointer, 0)) { + (*p_n_removed)++; + continue; + } + + (*p_n_removed)++; + + tmp = get_block_num(p_n_unfm_pointer, 0); + put_block_num(p_n_unfm_pointer, 0, 0); + journal_mark_dirty(th, p_s_sb, p_s_bh); + reiserfs_free_block(th, inode, tmp, 1); + if (item_moved(&s_ih, p_s_path)) { + need_research = 1; + break; + } + } + + /* a trick. If the buffer has been logged, this + ** will do nothing. If we've broken the loop without + ** logging it, it will restore the buffer + ** + */ + reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh); + + /* This loop can be optimized. */ + } while ((*p_n_removed < n_unfm_number || need_research) && + search_for_position_by_key(p_s_sb, p_s_item_key, + p_s_path) == + POSITION_FOUND); + + RFALSE(*p_n_removed < n_unfm_number, + "PAP-5310: indirect item is not found"); + RFALSE(item_moved(&s_ih, p_s_path), + "after while, comp failed, retry"); + + if (c_mode == M_CUT) + pos_in_item(p_s_path) *= UNFM_P_SIZE; + return c_mode; + } } /* Calculate number of bytes which will be deleted or cut during balance */ -static int calc_deleted_bytes_number( - struct tree_balance * p_s_tb, - char c_mode - ) { - int n_del_size; - struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path); - - if ( is_statdata_le_ih (p_le_ih) ) - return 0; +static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode) +{ + int n_del_size; + struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path); + + if (is_statdata_le_ih(p_le_ih)) + return 0; + + n_del_size = + (c_mode == + M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0]; + if (is_direntry_le_ih(p_le_ih)) { + // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ + // we can't use EMPTY_DIR_SIZE, as old format dirs have a different + // empty size. ick. FIXME, is this right? + // + return n_del_size; + } - n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0]; - if ( is_direntry_le_ih (p_le_ih) ) { - // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ - // we can't use EMPTY_DIR_SIZE, as old format dirs have a different - // empty size. ick. FIXME, is this right? - // - return n_del_size ; - } - - if ( is_indirect_le_ih (p_le_ih) ) - n_del_size = (n_del_size/UNFM_P_SIZE)* - (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih); - return n_del_size; + if (is_indirect_le_ih(p_le_ih)) + n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih); + return n_del_size; } -static void init_tb_struct( - struct reiserfs_transaction_handle *th, - struct tree_balance * p_s_tb, - struct super_block * p_s_sb, - struct path * p_s_path, - int n_size - ) { - - BUG_ON (!th->t_trans_id); - - memset (p_s_tb,'\0',sizeof(struct tree_balance)); - p_s_tb->transaction_handle = th ; - p_s_tb->tb_sb = p_s_sb; - p_s_tb->tb_path = p_s_path; - PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; - PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; - p_s_tb->insert_size[0] = n_size; -} +static void init_tb_struct(struct reiserfs_transaction_handle *th, + struct tree_balance *p_s_tb, + struct super_block *p_s_sb, + struct path *p_s_path, int n_size) +{ + BUG_ON(!th->t_trans_id); + memset(p_s_tb, '\0', sizeof(struct tree_balance)); + p_s_tb->transaction_handle = th; + p_s_tb->tb_sb = p_s_sb; + p_s_tb->tb_path = p_s_path; + PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; + PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; + p_s_tb->insert_size[0] = n_size; +} -void padd_item (char * item, int total_length, int length) +void padd_item(char *item, int total_length, int length) { - int i; + int i; - for (i = total_length; i > length; ) - item [--i] = 0; + for (i = total_length; i > length;) + item[--i] = 0; } #ifdef REISERQUOTA_DEBUG char key2type(struct reiserfs_key *ih) { - if (is_direntry_le_key(2, ih)) - return 'd'; - if (is_direct_le_key(2, ih)) - return 'D'; - if (is_indirect_le_key(2, ih)) - return 'i'; - if (is_statdata_le_key(2, ih)) - return 's'; - return 'u'; + if (is_direntry_le_key(2, ih)) + return 'd'; + if (is_direct_le_key(2, ih)) + return 'D'; + if (is_indirect_le_key(2, ih)) + return 'i'; + if (is_statdata_le_key(2, ih)) + return 's'; + return 'u'; } char head2type(struct item_head *ih) { - if (is_direntry_le_ih(ih)) - return 'd'; - if (is_direct_le_ih(ih)) - return 'D'; - if (is_indirect_le_ih(ih)) - return 'i'; - if (is_statdata_le_ih(ih)) - return 's'; - return 'u'; + if (is_direntry_le_ih(ih)) + return 'd'; + if (is_direct_le_ih(ih)) + return 'D'; + if (is_indirect_le_ih(ih)) + return 'i'; + if (is_statdata_le_ih(ih)) + return 's'; + return 'u'; } #endif /* Delete object item. */ -int reiserfs_delete_item (struct reiserfs_transaction_handle *th, - struct path * p_s_path, /* Path to the deleted item. */ - const struct cpu_key * p_s_item_key, /* Key to search for the deleted item. */ - struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */ - struct buffer_head * p_s_un_bh) /* NULL or unformatted node pointer. */ -{ - struct super_block * p_s_sb = p_s_inode->i_sb; - struct tree_balance s_del_balance; - struct item_head s_ih; - struct item_head *q_ih; - int quota_cut_bytes; - int n_ret_value, - n_del_size, - n_removed; +int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the deleted item. */ + const struct cpu_key *p_s_item_key, /* Key to search for the deleted item. */ + struct inode *p_s_inode, /* inode is here just to update i_blocks and quotas */ + struct buffer_head *p_s_un_bh) +{ /* NULL or unformatted node pointer. */ + struct super_block *p_s_sb = p_s_inode->i_sb; + struct tree_balance s_del_balance; + struct item_head s_ih; + struct item_head *q_ih; + int quota_cut_bytes; + int n_ret_value, n_del_size, n_removed; #ifdef CONFIG_REISERFS_CHECK - char c_mode; - int n_iter = 0; + char c_mode; + int n_iter = 0; #endif - BUG_ON (!th->t_trans_id); + BUG_ON(!th->t_trans_id); - init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/); + init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, + 0 /*size is unknown */ ); - while ( 1 ) { - n_removed = 0; + while (1) { + n_removed = 0; #ifdef CONFIG_REISERFS_CHECK - n_iter++; - c_mode = + n_iter++; + c_mode = #endif - prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode)); - - RFALSE( c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); - - copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); - s_del_balance.insert_size[0] = n_del_size; - - n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); - if ( n_ret_value != REPEAT_SEARCH ) - break; - - PROC_INFO_INC( p_s_sb, delete_item_restarted ); + prepare_for_delete_or_cut(th, p_s_inode, p_s_path, + p_s_item_key, &n_removed, + &n_del_size, + max_reiserfs_offset(p_s_inode)); + + RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); + + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); + s_del_balance.insert_size[0] = n_del_size; + + n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); + if (n_ret_value != REPEAT_SEARCH) + break; + + PROC_INFO_INC(p_s_sb, delete_item_restarted); + + // file system changed, repeat search + n_ret_value = + search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); + if (n_ret_value == IO_ERROR) + break; + if (n_ret_value == FILE_NOT_FOUND) { + reiserfs_warning(p_s_sb, + "vs-5340: reiserfs_delete_item: " + "no items of the file %K found", + p_s_item_key); + break; + } + } /* while (1) */ - // file system changed, repeat search - n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); - if (n_ret_value == IO_ERROR) - break; - if (n_ret_value == FILE_NOT_FOUND) { - reiserfs_warning (p_s_sb, "vs-5340: reiserfs_delete_item: " - "no items of the file %K found", p_s_item_key); - break; + if (n_ret_value != CARRY_ON) { + unfix_nodes(&s_del_balance); + return 0; + } + // reiserfs_delete_item returns item length when success + n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); + q_ih = get_ih(p_s_path); + quota_cut_bytes = ih_item_len(q_ih); + + /* hack so the quota code doesn't have to guess if the file + ** has a tail. On tail insert, we allocate quota for 1 unformatted node. + ** We test the offset because the tail might have been + ** split into multiple items, and we only want to decrement for + ** the unfm node once + */ + if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) { + if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) { + quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; + } else { + quota_cut_bytes = 0; + } } - } /* while (1) */ - if ( n_ret_value != CARRY_ON ) { - unfix_nodes(&s_del_balance); - return 0; - } - - // reiserfs_delete_item returns item length when success - n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); - q_ih = get_ih(p_s_path) ; - quota_cut_bytes = ih_item_len(q_ih) ; - - /* hack so the quota code doesn't have to guess if the file - ** has a tail. On tail insert, we allocate quota for 1 unformatted node. - ** We test the offset because the tail might have been - ** split into multiple items, and we only want to decrement for - ** the unfm node once - */ - if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) { - if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) { - quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; - } else { - quota_cut_bytes = 0 ; + if (p_s_un_bh) { + int off; + char *data; + + /* We are in direct2indirect conversion, so move tail contents + to the unformatted node */ + /* note, we do the copy before preparing the buffer because we + ** don't care about the contents of the unformatted node yet. + ** the only thing we really care about is the direct item's data + ** is in the unformatted node. + ** + ** Otherwise, we would have to call reiserfs_prepare_for_journal on + ** the unformatted node, which might schedule, meaning we'd have to + ** loop all the way back up to the start of the while loop. + ** + ** The unformatted node must be dirtied later on. We can't be + ** sure here if the entire tail has been deleted yet. + ** + ** p_s_un_bh is from the page cache (all unformatted nodes are + ** from the page cache) and might be a highmem page. So, we + ** can't use p_s_un_bh->b_data. + ** -clm + */ + + data = kmap_atomic(p_s_un_bh->b_page, KM_USER0); + off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); + memcpy(data + off, + B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), + n_ret_value); + kunmap_atomic(data, KM_USER0); } - } - - if ( p_s_un_bh ) { - int off; - char *data ; - - /* We are in direct2indirect conversion, so move tail contents - to the unformatted node */ - /* note, we do the copy before preparing the buffer because we - ** don't care about the contents of the unformatted node yet. - ** the only thing we really care about is the direct item's data - ** is in the unformatted node. - ** - ** Otherwise, we would have to call reiserfs_prepare_for_journal on - ** the unformatted node, which might schedule, meaning we'd have to - ** loop all the way back up to the start of the while loop. - ** - ** The unformatted node must be dirtied later on. We can't be - ** sure here if the entire tail has been deleted yet. - ** - ** p_s_un_bh is from the page cache (all unformatted nodes are - ** from the page cache) and might be a highmem page. So, we - ** can't use p_s_un_bh->b_data. - ** -clm - */ - - data = kmap_atomic(p_s_un_bh->b_page, KM_USER0); - off = ((le_ih_k_offset (&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); - memcpy(data + off, - B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value); - kunmap_atomic(data, KM_USER0); - } - /* Perform balancing after all resources have been collected at once. */ - do_balance(&s_del_balance, NULL, NULL, M_DELETE); + /* Perform balancing after all resources have been collected at once. */ + do_balance(&s_del_balance, NULL, NULL, M_DELETE); #ifdef REISERQUOTA_DEBUG - reiserfs_debug (p_s_sb, REISERFS_DEBUG_CODE, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); + reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, + "reiserquota delete_item(): freeing %u, id=%u type=%c", + quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); #endif - DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); + DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); - /* Return deleted body length */ - return n_ret_value; + /* Return deleted body length */ + return n_ret_value; } - /* Summary Of Mechanisms For Handling Collisions Between Processes: deletion of the body of the object is performed by iput(), with the @@ -1347,727 +1387,804 @@ int reiserfs_delete_item (struct reiserfs_transaction_handle *th, - Hans */ - /* this deletes item which never gets split */ -void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, - struct inode *inode, - struct reiserfs_key * key) +void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, + struct inode *inode, struct reiserfs_key *key) { - struct tree_balance tb; - INITIALIZE_PATH (path); - int item_len = 0; - int tb_init = 0 ; - struct cpu_key cpu_key; - int retval; - int quota_cut_bytes = 0; - - BUG_ON (!th->t_trans_id); - - le_key2cpu_key (&cpu_key, key); - - while (1) { - retval = search_item (th->t_super, &cpu_key, &path); - if (retval == IO_ERROR) { - reiserfs_warning (th->t_super, - "vs-5350: reiserfs_delete_solid_item: " - "i/o failure occurred trying to delete %K", - &cpu_key); - break; - } - if (retval != ITEM_FOUND) { - pathrelse (&path); - // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir - if ( !( (unsigned long long) GET_HASH_VALUE (le_key_k_offset (le_key_version (key), key)) == 0 && \ - (unsigned long long) GET_GENERATION_NUMBER (le_key_k_offset (le_key_version (key), key)) == 1 ) ) - reiserfs_warning (th->t_super, "vs-5355: reiserfs_delete_solid_item: %k not found", key); - break; - } - if (!tb_init) { - tb_init = 1 ; - item_len = ih_item_len( PATH_PITEM_HEAD(&path) ); - init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len)); - } - quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ; + struct tree_balance tb; + INITIALIZE_PATH(path); + int item_len = 0; + int tb_init = 0; + struct cpu_key cpu_key; + int retval; + int quota_cut_bytes = 0; + + BUG_ON(!th->t_trans_id); + + le_key2cpu_key(&cpu_key, key); + + while (1) { + retval = search_item(th->t_super, &cpu_key, &path); + if (retval == IO_ERROR) { + reiserfs_warning(th->t_super, + "vs-5350: reiserfs_delete_solid_item: " + "i/o failure occurred trying to delete %K", + &cpu_key); + break; + } + if (retval != ITEM_FOUND) { + pathrelse(&path); + // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir + if (! + ((unsigned long long) + GET_HASH_VALUE(le_key_k_offset + (le_key_version(key), key)) == 0 + && (unsigned long long) + GET_GENERATION_NUMBER(le_key_k_offset + (le_key_version(key), + key)) == 1)) + reiserfs_warning(th->t_super, + "vs-5355: reiserfs_delete_solid_item: %k not found", + key); + break; + } + if (!tb_init) { + tb_init = 1; + item_len = ih_item_len(PATH_PITEM_HEAD(&path)); + init_tb_struct(th, &tb, th->t_super, &path, + -(IH_SIZE + item_len)); + } + quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)); - retval = fix_nodes (M_DELETE, &tb, NULL, NULL); - if (retval == REPEAT_SEARCH) { - PROC_INFO_INC( th -> t_super, delete_solid_item_restarted ); - continue; - } + retval = fix_nodes(M_DELETE, &tb, NULL, NULL); + if (retval == REPEAT_SEARCH) { + PROC_INFO_INC(th->t_super, delete_solid_item_restarted); + continue; + } - if (retval == CARRY_ON) { - do_balance (&tb, NULL, NULL, M_DELETE); - if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ + if (retval == CARRY_ON) { + do_balance(&tb, NULL, NULL, M_DELETE); + if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ #ifdef REISERQUOTA_DEBUG - reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key)); + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, + "reiserquota delete_solid_item(): freeing %u id=%u type=%c", + quota_cut_bytes, inode->i_uid, + key2type(key)); #endif - DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); - } - break; + DQUOT_FREE_SPACE_NODIRTY(inode, + quota_cut_bytes); + } + break; + } + // IO_ERROR, NO_DISK_SPACE, etc + reiserfs_warning(th->t_super, + "vs-5360: reiserfs_delete_solid_item: " + "could not delete %K due to fix_nodes failure", + &cpu_key); + unfix_nodes(&tb); + break; } - // IO_ERROR, NO_DISK_SPACE, etc - reiserfs_warning (th->t_super, "vs-5360: reiserfs_delete_solid_item: " - "could not delete %K due to fix_nodes failure", &cpu_key); - unfix_nodes (&tb); - break; - } - - reiserfs_check_path(&path) ; + reiserfs_check_path(&path); } - -int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode) +int reiserfs_delete_object(struct reiserfs_transaction_handle *th, + struct inode *inode) { - int err; - inode->i_size = 0; - BUG_ON (!th->t_trans_id); - - /* for directory this deletes item containing "." and ".." */ - err = reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/); - if (err) - return err; - + int err; + inode->i_size = 0; + BUG_ON(!th->t_trans_id); + + /* for directory this deletes item containing "." and ".." */ + err = + reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ ); + if (err) + return err; + #if defined( USE_INODE_GENERATION_COUNTER ) - if( !old_format_only ( th -> t_super ) ) - { - __le32 *inode_generation; - - inode_generation = - &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation; - *inode_generation = cpu_to_le32( le32_to_cpu( *inode_generation ) + 1 ); - } + if (!old_format_only(th->t_super)) { + __le32 *inode_generation; + + inode_generation = + &REISERFS_SB(th->t_super)->s_rs->s_inode_generation; + *inode_generation = + cpu_to_le32(le32_to_cpu(*inode_generation) + 1); + } /* USE_INODE_GENERATION_COUNTER */ #endif - reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode)); + reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); - return err; + return err; } -static void -unmap_buffers(struct page *page, loff_t pos) { - struct buffer_head *bh ; - struct buffer_head *head ; - struct buffer_head *next ; - unsigned long tail_index ; - unsigned long cur_index ; - - if (page) { - if (page_has_buffers(page)) { - tail_index = pos & (PAGE_CACHE_SIZE - 1) ; - cur_index = 0 ; - head = page_buffers(page) ; - bh = head ; - do { - next = bh->b_this_page ; - - /* we want to unmap the buffers that contain the tail, and - ** all the buffers after it (since the tail must be at the - ** end of the file). We don't want to unmap file data - ** before the tail, since it might be dirty and waiting to - ** reach disk - */ - cur_index += bh->b_size ; - if (cur_index > tail_index) { - reiserfs_unmap_buffer(bh) ; +static void unmap_buffers(struct page *page, loff_t pos) +{ + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *next; + unsigned long tail_index; + unsigned long cur_index; + + if (page) { + if (page_has_buffers(page)) { + tail_index = pos & (PAGE_CACHE_SIZE - 1); + cur_index = 0; + head = page_buffers(page); + bh = head; + do { + next = bh->b_this_page; + + /* we want to unmap the buffers that contain the tail, and + ** all the buffers after it (since the tail must be at the + ** end of the file). We don't want to unmap file data + ** before the tail, since it might be dirty and waiting to + ** reach disk + */ + cur_index += bh->b_size; + if (cur_index > tail_index) { + reiserfs_unmap_buffer(bh); + } + bh = next; + } while (bh != head); + if (PAGE_SIZE == bh->b_size) { + clear_page_dirty(page); + } } - bh = next ; - } while (bh != head) ; - if ( PAGE_SIZE == bh->b_size ) { - clear_page_dirty(page); - } } - } } -static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th, - struct inode * p_s_inode, - struct page *page, - struct path * p_s_path, - const struct cpu_key * p_s_item_key, - loff_t n_new_file_size, - char * p_c_mode - ) { - struct super_block * p_s_sb = p_s_inode->i_sb; - int n_block_size = p_s_sb->s_blocksize; - int cut_bytes; - BUG_ON (!th->t_trans_id); - - if (n_new_file_size != p_s_inode->i_size) - BUG (); - - /* the page being sent in could be NULL if there was an i/o error - ** reading in the last block. The user will hit problems trying to - ** read the file, but for now we just skip the indirect2direct - */ - if (atomic_read(&p_s_inode->i_count) > 1 || - !tail_has_to_be_packed (p_s_inode) || - !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) { - // leave tail in an unformatted node - *p_c_mode = M_SKIP_BALANCING; - cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1)); - pathrelse(p_s_path); - return cut_bytes; - } - /* Permorm the conversion to a direct_item. */ - /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/ - return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); -} +static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, + struct inode *p_s_inode, + struct page *page, + struct path *p_s_path, + const struct cpu_key *p_s_item_key, + loff_t n_new_file_size, char *p_c_mode) +{ + struct super_block *p_s_sb = p_s_inode->i_sb; + int n_block_size = p_s_sb->s_blocksize; + int cut_bytes; + BUG_ON(!th->t_trans_id); + + if (n_new_file_size != p_s_inode->i_size) + BUG(); + /* the page being sent in could be NULL if there was an i/o error + ** reading in the last block. The user will hit problems trying to + ** read the file, but for now we just skip the indirect2direct + */ + if (atomic_read(&p_s_inode->i_count) > 1 || + !tail_has_to_be_packed(p_s_inode) || + !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) { + // leave tail in an unformatted node + *p_c_mode = M_SKIP_BALANCING; + cut_bytes = + n_block_size - (n_new_file_size & (n_block_size - 1)); + pathrelse(p_s_path); + return cut_bytes; + } + /* Permorm the conversion to a direct_item. */ + /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */ + return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key, + n_new_file_size, p_c_mode); +} /* we did indirect_to_direct conversion. And we have inserted direct item successesfully, but there were no disk space to cut unfm pointer being converted. Therefore we have to delete inserted direct item(s) */ -static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path) +static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, + struct inode *inode, struct path *path) { - struct cpu_key tail_key; - int tail_len; - int removed; - BUG_ON (!th->t_trans_id); - - make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!! - tail_key.key_length = 4; - - tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; - while (tail_len) { - /* look for the last byte of the tail */ - if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND) - reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item"); - RFALSE( path->pos_in_item != ih_item_len(PATH_PITEM_HEAD (path)) - 1, - "vs-5616: appended bytes found"); - PATH_LAST_POSITION (path) --; - - removed = reiserfs_delete_item (th, path, &tail_key, inode, NULL/*unbh not needed*/); - RFALSE( removed <= 0 || removed > tail_len, - "vs-5617: there was tail %d bytes, removed item length %d bytes", - tail_len, removed); - tail_len -= removed; - set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed); - } - reiserfs_warning (inode->i_sb, "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space"); - //mark_file_without_tail (inode); - mark_inode_dirty (inode); + struct cpu_key tail_key; + int tail_len; + int removed; + BUG_ON(!th->t_trans_id); + + make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!! + tail_key.key_length = 4; + + tail_len = + (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; + while (tail_len) { + /* look for the last byte of the tail */ + if (search_for_position_by_key(inode->i_sb, &tail_key, path) == + POSITION_NOT_FOUND) + reiserfs_panic(inode->i_sb, + "vs-5615: indirect_to_direct_roll_back: found invalid item"); + RFALSE(path->pos_in_item != + ih_item_len(PATH_PITEM_HEAD(path)) - 1, + "vs-5616: appended bytes found"); + PATH_LAST_POSITION(path)--; + + removed = + reiserfs_delete_item(th, path, &tail_key, inode, + NULL /*unbh not needed */ ); + RFALSE(removed <= 0 + || removed > tail_len, + "vs-5617: there was tail %d bytes, removed item length %d bytes", + tail_len, removed); + tail_len -= removed; + set_cpu_key_k_offset(&tail_key, + cpu_key_k_offset(&tail_key) - removed); + } + reiserfs_warning(inode->i_sb, + "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space"); + //mark_file_without_tail (inode); + mark_inode_dirty(inode); } - /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ -int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, - struct path * p_s_path, - struct cpu_key * p_s_item_key, - struct inode * p_s_inode, - struct page *page, - loff_t n_new_file_size) +int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, + struct path *p_s_path, + struct cpu_key *p_s_item_key, + struct inode *p_s_inode, + struct page *page, loff_t n_new_file_size) { - struct super_block * p_s_sb = p_s_inode->i_sb; - /* Every function which is going to call do_balance must first - create a tree_balance structure. Then it must fill up this - structure by using the init_tb_struct and fix_nodes functions. - After that we can make tree balancing. */ - struct tree_balance s_cut_balance; - struct item_head *p_le_ih; - int n_cut_size = 0, /* Amount to be cut. */ - n_ret_value = CARRY_ON, - n_removed = 0, /* Number of the removed unformatted nodes. */ - n_is_inode_locked = 0; - char c_mode; /* Mode of the balance. */ - int retval2 = -1; - int quota_cut_bytes; - loff_t tail_pos = 0; - - BUG_ON (!th->t_trans_id); - - init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size); - - - /* Repeat this loop until we either cut the item without needing - to balance, or we fix_nodes without schedule occurring */ - while ( 1 ) { - /* Determine the balance mode, position of the first byte to - be cut, and size to be cut. In case of the indirect item - free unformatted nodes which are pointed to by the cut - pointers. */ - - c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, - &n_cut_size, n_new_file_size); - if ( c_mode == M_CONVERT ) { - /* convert last unformatted node to direct item or leave - tail in the unformatted node */ - RFALSE( n_ret_value != CARRY_ON, "PAP-5570: can not convert twice"); - - n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key, - n_new_file_size, &c_mode); - if ( c_mode == M_SKIP_BALANCING ) - /* tail has been left in the unformatted node */ - return n_ret_value; - - n_is_inode_locked = 1; - - /* removing of last unformatted node will change value we - have to return to truncate. Save it */ - retval2 = n_ret_value; - /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/ - - /* So, we have performed the first part of the conversion: - inserting the new direct item. Now we are removing the - last unformatted node pointer. Set key to search for - it. */ - set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT); - p_s_item_key->key_length = 4; - n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1)); - tail_pos = n_new_file_size; - set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1); - if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){ - print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1); - reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", p_s_item_key); - } - continue; - } - if (n_cut_size == 0) { - pathrelse (p_s_path); - return 0; - } + struct super_block *p_s_sb = p_s_inode->i_sb; + /* Every function which is going to call do_balance must first + create a tree_balance structure. Then it must fill up this + structure by using the init_tb_struct and fix_nodes functions. + After that we can make tree balancing. */ + struct tree_balance s_cut_balance; + struct item_head *p_le_ih; + int n_cut_size = 0, /* Amount to be cut. */ + n_ret_value = CARRY_ON, n_removed = 0, /* Number of the removed unformatted nodes. */ + n_is_inode_locked = 0; + char c_mode; /* Mode of the balance. */ + int retval2 = -1; + int quota_cut_bytes; + loff_t tail_pos = 0; + + BUG_ON(!th->t_trans_id); + + init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, + n_cut_size); + + /* Repeat this loop until we either cut the item without needing + to balance, or we fix_nodes without schedule occurring */ + while (1) { + /* Determine the balance mode, position of the first byte to + be cut, and size to be cut. In case of the indirect item + free unformatted nodes which are pointed to by the cut + pointers. */ + + c_mode = + prepare_for_delete_or_cut(th, p_s_inode, p_s_path, + p_s_item_key, &n_removed, + &n_cut_size, n_new_file_size); + if (c_mode == M_CONVERT) { + /* convert last unformatted node to direct item or leave + tail in the unformatted node */ + RFALSE(n_ret_value != CARRY_ON, + "PAP-5570: can not convert twice"); + + n_ret_value = + maybe_indirect_to_direct(th, p_s_inode, page, + p_s_path, p_s_item_key, + n_new_file_size, &c_mode); + if (c_mode == M_SKIP_BALANCING) + /* tail has been left in the unformatted node */ + return n_ret_value; + + n_is_inode_locked = 1; + + /* removing of last unformatted node will change value we + have to return to truncate. Save it */ + retval2 = n_ret_value; + /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */ + + /* So, we have performed the first part of the conversion: + inserting the new direct item. Now we are removing the + last unformatted node pointer. Set key to search for + it. */ + set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT); + p_s_item_key->key_length = 4; + n_new_file_size -= + (n_new_file_size & (p_s_sb->s_blocksize - 1)); + tail_pos = n_new_file_size; + set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1); + if (search_for_position_by_key + (p_s_sb, p_s_item_key, + p_s_path) == POSITION_NOT_FOUND) { + print_block(PATH_PLAST_BUFFER(p_s_path), 3, + PATH_LAST_POSITION(p_s_path) - 1, + PATH_LAST_POSITION(p_s_path) + 1); + reiserfs_panic(p_s_sb, + "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", + p_s_item_key); + } + continue; + } + if (n_cut_size == 0) { + pathrelse(p_s_path); + return 0; + } + + s_cut_balance.insert_size[0] = n_cut_size; + + n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL); + if (n_ret_value != REPEAT_SEARCH) + break; + + PROC_INFO_INC(p_s_sb, cut_from_item_restarted); + + n_ret_value = + search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); + if (n_ret_value == POSITION_FOUND) + continue; - s_cut_balance.insert_size[0] = n_cut_size; - - n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL); - if ( n_ret_value != REPEAT_SEARCH ) - break; - - PROC_INFO_INC( p_s_sb, cut_from_item_restarted ); - - n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); - if (n_ret_value == POSITION_FOUND) - continue; - - reiserfs_warning (p_s_sb, "PAP-5610: reiserfs_cut_from_item: item %K not found", p_s_item_key); - unfix_nodes (&s_cut_balance); - return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; - } /* while */ - - // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) - if ( n_ret_value != CARRY_ON ) { - if ( n_is_inode_locked ) { - // FIXME: this seems to be not needed: we are always able - // to cut item - indirect_to_direct_roll_back (th, p_s_inode, p_s_path); + reiserfs_warning(p_s_sb, + "PAP-5610: reiserfs_cut_from_item: item %K not found", + p_s_item_key); + unfix_nodes(&s_cut_balance); + return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; + } /* while */ + + // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) + if (n_ret_value != CARRY_ON) { + if (n_is_inode_locked) { + // FIXME: this seems to be not needed: we are always able + // to cut item + indirect_to_direct_roll_back(th, p_s_inode, p_s_path); + } + if (n_ret_value == NO_DISK_SPACE) + reiserfs_warning(p_s_sb, "NO_DISK_SPACE"); + unfix_nodes(&s_cut_balance); + return -EIO; } - if (n_ret_value == NO_DISK_SPACE) - reiserfs_warning (p_s_sb, "NO_DISK_SPACE"); - unfix_nodes (&s_cut_balance); - return -EIO; - } - - /* go ahead and perform balancing */ - - RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode"); - - /* Calculate number of bytes that need to be cut from the item. */ - quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0]; - if (retval2 == -1) - n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); - else - n_ret_value = retval2; - - - /* For direct items, we only change the quota when deleting the last - ** item. - */ - p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); - if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) { - if (c_mode == M_DELETE && - (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) { - // FIXME: this is to keep 3.5 happy - REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX; - quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ; - } else { - quota_cut_bytes = 0 ; + + /* go ahead and perform balancing */ + + RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode"); + + /* Calculate number of bytes that need to be cut from the item. */ + quota_cut_bytes = + (c_mode == + M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance. + insert_size[0]; + if (retval2 == -1) + n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); + else + n_ret_value = retval2; + + /* For direct items, we only change the quota when deleting the last + ** item. + */ + p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path); + if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) { + if (c_mode == M_DELETE && + (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) == + 1) { + // FIXME: this is to keep 3.5 happy + REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX; + quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; + } else { + quota_cut_bytes = 0; + } } - } #ifdef CONFIG_REISERFS_CHECK - if (n_is_inode_locked) { - struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); - /* we are going to complete indirect2direct conversion. Make - sure, that we exactly remove last unformatted node pointer - of the item */ - if (!is_indirect_le_ih (le_ih)) - reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: " - "item must be indirect %h", le_ih); - - if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) - reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: " - "completing indirect2direct conversion indirect item %h " - "being deleted must be of 4 byte long", le_ih); - - if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { - reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: " - "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)", - le_ih, s_cut_balance.insert_size[0]); + if (n_is_inode_locked) { + struct item_head *le_ih = + PATH_PITEM_HEAD(s_cut_balance.tb_path); + /* we are going to complete indirect2direct conversion. Make + sure, that we exactly remove last unformatted node pointer + of the item */ + if (!is_indirect_le_ih(le_ih)) + reiserfs_panic(p_s_sb, + "vs-5652: reiserfs_cut_from_item: " + "item must be indirect %h", le_ih); + + if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) + reiserfs_panic(p_s_sb, + "vs-5653: reiserfs_cut_from_item: " + "completing indirect2direct conversion indirect item %h " + "being deleted must be of 4 byte long", + le_ih); + + if (c_mode == M_CUT + && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { + reiserfs_panic(p_s_sb, + "vs-5654: reiserfs_cut_from_item: " + "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)", + le_ih, s_cut_balance.insert_size[0]); + } + /* it would be useful to make sure, that right neighboring + item is direct item of this file */ } - /* it would be useful to make sure, that right neighboring - item is direct item of this file */ - } #endif - - do_balance(&s_cut_balance, NULL, NULL, c_mode); - if ( n_is_inode_locked ) { - /* we've done an indirect->direct conversion. when the data block - ** was freed, it was removed from the list of blocks that must - ** be flushed before the transaction commits, make sure to - ** unmap and invalidate it - */ - unmap_buffers(page, tail_pos); - REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask ; - } + + do_balance(&s_cut_balance, NULL, NULL, c_mode); + if (n_is_inode_locked) { + /* we've done an indirect->direct conversion. when the data block + ** was freed, it was removed from the list of blocks that must + ** be flushed before the transaction commits, make sure to + ** unmap and invalidate it + */ + unmap_buffers(page, tail_pos); + REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask; + } #ifdef REISERQUOTA_DEBUG - reiserfs_debug (p_s_inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?'); + reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota cut_from_item(): freeing %u id=%u type=%c", + quota_cut_bytes, p_s_inode->i_uid, '?'); #endif - DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); - return n_ret_value; + DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); + return n_ret_value; } -static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode) +static void truncate_directory(struct reiserfs_transaction_handle *th, + struct inode *inode) { - BUG_ON (!th->t_trans_id); - if (inode->i_nlink) - reiserfs_warning (inode->i_sb, - "vs-5655: truncate_directory: link count != 0"); - - set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET); - set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY); - reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode)); - reiserfs_update_sd(th, inode) ; - set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET); - set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA); + BUG_ON(!th->t_trans_id); + if (inode->i_nlink) + reiserfs_warning(inode->i_sb, + "vs-5655: truncate_directory: link count != 0"); + + set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET); + set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY); + reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); + reiserfs_update_sd(th, inode); + set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET); + set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); } +/* Truncate file to the new size. Note, this must be called with a transaction + already started */ +int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, /* ->i_size contains new + size */ + struct page *page, /* up to date for last block */ + int update_timestamps /* when it is called by + file_release to convert + the tail - no timestamps + should be updated */ + ) +{ + INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ + struct item_head *p_le_ih; /* Pointer to an item header. */ + struct cpu_key s_item_key; /* Key to search for a previous file item. */ + loff_t n_file_size, /* Old file size. */ + n_new_file_size; /* New file size. */ + int n_deleted; /* Number of deleted or truncated bytes. */ + int retval; + int err = 0; + + BUG_ON(!th->t_trans_id); + if (! + (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) + || S_ISLNK(p_s_inode->i_mode))) + return 0; + + if (S_ISDIR(p_s_inode->i_mode)) { + // deletion of directory - no need to update timestamps + truncate_directory(th, p_s_inode); + return 0; + } + /* Get new file size. */ + n_new_file_size = p_s_inode->i_size; + // FIXME: note, that key type is unimportant here + make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode), + TYPE_DIRECT, 3); -/* Truncate file to the new size. Note, this must be called with a transaction - already started */ -int reiserfs_do_truncate (struct reiserfs_transaction_handle *th, - struct inode * p_s_inode, /* ->i_size contains new - size */ - struct page *page, /* up to date for last block */ - int update_timestamps /* when it is called by - file_release to convert - the tail - no timestamps - should be updated */ - ) { - INITIALIZE_PATH (s_search_path); /* Path to the current object item. */ - struct item_head * p_le_ih; /* Pointer to an item header. */ - struct cpu_key s_item_key; /* Key to search for a previous file item. */ - loff_t n_file_size, /* Old file size. */ - n_new_file_size;/* New file size. */ - int n_deleted; /* Number of deleted or truncated bytes. */ - int retval; - int err = 0; - - BUG_ON (!th->t_trans_id); - if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) ) - return 0; + retval = + search_for_position_by_key(p_s_inode->i_sb, &s_item_key, + &s_search_path); + if (retval == IO_ERROR) { + reiserfs_warning(p_s_inode->i_sb, + "vs-5657: reiserfs_do_truncate: " + "i/o failure occurred trying to truncate %K", + &s_item_key); + err = -EIO; + goto out; + } + if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { + reiserfs_warning(p_s_inode->i_sb, + "PAP-5660: reiserfs_do_truncate: " + "wrong result %d of search for %K", retval, + &s_item_key); + + err = -EIO; + goto out; + } - if (S_ISDIR(p_s_inode->i_mode)) { - // deletion of directory - no need to update timestamps - truncate_directory (th, p_s_inode); - return 0; - } - - /* Get new file size. */ - n_new_file_size = p_s_inode->i_size; - - // FIXME: note, that key type is unimportant here - make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3); - - retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path); - if (retval == IO_ERROR) { - reiserfs_warning (p_s_inode->i_sb, "vs-5657: reiserfs_do_truncate: " - "i/o failure occurred trying to truncate %K", &s_item_key); - err = -EIO; - goto out; - } - if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { - reiserfs_warning (p_s_inode->i_sb, "PAP-5660: reiserfs_do_truncate: " - "wrong result %d of search for %K", retval, &s_item_key); - - err = -EIO; - goto out; - } - - s_search_path.pos_in_item --; - - /* Get real file size (total length of all file items) */ - p_le_ih = PATH_PITEM_HEAD(&s_search_path); - if ( is_statdata_le_ih (p_le_ih) ) - n_file_size = 0; - else { - loff_t offset = le_ih_k_offset (p_le_ih); - int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize); - - /* this may mismatch with real file size: if last direct item - had no padding zeros and last unformatted node had no free - space, this file would have this file size */ - n_file_size = offset + bytes - 1; - } - /* - * are we doing a full truncate or delete, if so - * kick in the reada code - */ - if (n_new_file_size == 0) - s_search_path.reada = PATH_READA | PATH_READA_BACK; - - if ( n_file_size == 0 || n_file_size < n_new_file_size ) { - goto update_and_out ; - } - - /* Update key to search for the last file item. */ - set_cpu_key_k_offset (&s_item_key, n_file_size); - - do { - /* Cut or delete file item. */ - n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode, page, n_new_file_size); - if (n_deleted < 0) { - reiserfs_warning (p_s_inode->i_sb, "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed"); - reiserfs_check_path(&s_search_path) ; - return 0; + s_search_path.pos_in_item--; + + /* Get real file size (total length of all file items) */ + p_le_ih = PATH_PITEM_HEAD(&s_search_path); + if (is_statdata_le_ih(p_le_ih)) + n_file_size = 0; + else { + loff_t offset = le_ih_k_offset(p_le_ih); + int bytes = + op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize); + + /* this may mismatch with real file size: if last direct item + had no padding zeros and last unformatted node had no free + space, this file would have this file size */ + n_file_size = offset + bytes - 1; + } + /* + * are we doing a full truncate or delete, if so + * kick in the reada code + */ + if (n_new_file_size == 0) + s_search_path.reada = PATH_READA | PATH_READA_BACK; + + if (n_file_size == 0 || n_file_size < n_new_file_size) { + goto update_and_out; } - RFALSE( n_deleted > n_file_size, - "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", - n_deleted, n_file_size, &s_item_key); + /* Update key to search for the last file item. */ + set_cpu_key_k_offset(&s_item_key, n_file_size); + + do { + /* Cut or delete file item. */ + n_deleted = + reiserfs_cut_from_item(th, &s_search_path, &s_item_key, + p_s_inode, page, n_new_file_size); + if (n_deleted < 0) { + reiserfs_warning(p_s_inode->i_sb, + "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed"); + reiserfs_check_path(&s_search_path); + return 0; + } - /* Change key to search the last file item. */ - n_file_size -= n_deleted; + RFALSE(n_deleted > n_file_size, + "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", + n_deleted, n_file_size, &s_item_key); - set_cpu_key_k_offset (&s_item_key, n_file_size); + /* Change key to search the last file item. */ + n_file_size -= n_deleted; - /* While there are bytes to truncate and previous file item is presented in the tree. */ + set_cpu_key_k_offset(&s_item_key, n_file_size); - /* - ** This loop could take a really long time, and could log - ** many more blocks than a transaction can hold. So, we do a polite - ** journal end here, and if the transaction needs ending, we make - ** sure the file is consistent before ending the current trans - ** and starting a new one - */ - if (journal_transaction_should_end(th, th->t_blocks_allocated)) { - int orig_len_alloc = th->t_blocks_allocated ; - decrement_counters_in_path(&s_search_path) ; - - if (update_timestamps) { - p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC; - } - reiserfs_update_sd(th, p_s_inode) ; - - err = journal_end(th, p_s_inode->i_sb, orig_len_alloc) ; - if (err) - goto out; - err = journal_begin (th, p_s_inode->i_sb, - JOURNAL_PER_BALANCE_CNT * 6); - if (err) - goto out; - reiserfs_update_inode_transaction(p_s_inode) ; + /* While there are bytes to truncate and previous file item is presented in the tree. */ + + /* + ** This loop could take a really long time, and could log + ** many more blocks than a transaction can hold. So, we do a polite + ** journal end here, and if the transaction needs ending, we make + ** sure the file is consistent before ending the current trans + ** and starting a new one + */ + if (journal_transaction_should_end(th, th->t_blocks_allocated)) { + int orig_len_alloc = th->t_blocks_allocated; + decrement_counters_in_path(&s_search_path); + + if (update_timestamps) { + p_s_inode->i_mtime = p_s_inode->i_ctime = + CURRENT_TIME_SEC; + } + reiserfs_update_sd(th, p_s_inode); + + err = journal_end(th, p_s_inode->i_sb, orig_len_alloc); + if (err) + goto out; + err = journal_begin(th, p_s_inode->i_sb, + JOURNAL_PER_BALANCE_CNT * 6); + if (err) + goto out; + reiserfs_update_inode_transaction(p_s_inode); + } + } while (n_file_size > ROUND_UP(n_new_file_size) && + search_for_position_by_key(p_s_inode->i_sb, &s_item_key, + &s_search_path) == POSITION_FOUND); + + RFALSE(n_file_size > ROUND_UP(n_new_file_size), + "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", + n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid); + + update_and_out: + if (update_timestamps) { + // this is truncate, not file closing + p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC; } - } while ( n_file_size > ROUND_UP (n_new_file_size) && - search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND ) ; - - RFALSE( n_file_size > ROUND_UP (n_new_file_size), - "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", - n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid); - -update_and_out: - if (update_timestamps) { - // this is truncate, not file closing - p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC; - } - reiserfs_update_sd (th, p_s_inode); - -out: - pathrelse(&s_search_path) ; - return err; -} + reiserfs_update_sd(th, p_s_inode); + out: + pathrelse(&s_search_path); + return err; +} #ifdef CONFIG_REISERFS_CHECK // this makes sure, that we __append__, not overwrite or add holes -static void check_research_for_paste (struct path * path, - const struct cpu_key * p_s_key) +static void check_research_for_paste(struct path *path, + const struct cpu_key *p_s_key) { - struct item_head * found_ih = get_ih (path); - - if (is_direct_le_ih (found_ih)) { - if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != - cpu_key_k_offset (p_s_key) || - op_bytes_number (found_ih, get_last_bh (path)->b_size) != pos_in_item (path)) - reiserfs_panic (NULL, "PAP-5720: check_research_for_paste: " - "found direct item %h or position (%d) does not match to key %K", - found_ih, pos_in_item (path), p_s_key); - } - if (is_indirect_le_ih (found_ih)) { - if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != cpu_key_k_offset (p_s_key) || - I_UNFM_NUM (found_ih) != pos_in_item (path) || - get_ih_free_space (found_ih) != 0) - reiserfs_panic (NULL, "PAP-5730: check_research_for_paste: " - "found indirect item (%h) or position (%d) does not match to key (%K)", - found_ih, pos_in_item (path), p_s_key); - } + struct item_head *found_ih = get_ih(path); + + if (is_direct_le_ih(found_ih)) { + if (le_ih_k_offset(found_ih) + + op_bytes_number(found_ih, + get_last_bh(path)->b_size) != + cpu_key_k_offset(p_s_key) + || op_bytes_number(found_ih, + get_last_bh(path)->b_size) != + pos_in_item(path)) + reiserfs_panic(NULL, + "PAP-5720: check_research_for_paste: " + "found direct item %h or position (%d) does not match to key %K", + found_ih, pos_in_item(path), p_s_key); + } + if (is_indirect_le_ih(found_ih)) { + if (le_ih_k_offset(found_ih) + + op_bytes_number(found_ih, + get_last_bh(path)->b_size) != + cpu_key_k_offset(p_s_key) + || I_UNFM_NUM(found_ih) != pos_in_item(path) + || get_ih_free_space(found_ih) != 0) + reiserfs_panic(NULL, + "PAP-5730: check_research_for_paste: " + "found indirect item (%h) or position (%d) does not match to key (%K)", + found_ih, pos_in_item(path), p_s_key); + } } -#endif /* config reiserfs check */ - +#endif /* config reiserfs check */ /* Paste bytes to the existing item. Returns bytes number pasted into the item. */ -int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, - struct path * p_s_search_path, /* Path to the pasted item. */ - const struct cpu_key * p_s_key, /* Key to search for the needed item.*/ - struct inode * inode, /* Inode item belongs to */ - const char * p_c_body, /* Pointer to the bytes to paste. */ - int n_pasted_size) /* Size of pasted bytes. */ -{ - struct tree_balance s_paste_balance; - int retval; - int fs_gen; +int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct path *p_s_search_path, /* Path to the pasted item. */ + const struct cpu_key *p_s_key, /* Key to search for the needed item. */ + struct inode *inode, /* Inode item belongs to */ + const char *p_c_body, /* Pointer to the bytes to paste. */ + int n_pasted_size) +{ /* Size of pasted bytes. */ + struct tree_balance s_paste_balance; + int retval; + int fs_gen; + + BUG_ON(!th->t_trans_id); - BUG_ON (!th->t_trans_id); - - fs_gen = get_generation(inode->i_sb) ; + fs_gen = get_generation(inode->i_sb); #ifdef REISERQUOTA_DEBUG - reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): allocating %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); + reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota paste_into_item(): allocating %u id=%u type=%c", + n_pasted_size, inode->i_uid, + key2type(&(p_s_key->on_disk_key))); #endif - if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) { - pathrelse(p_s_search_path); - return -EDQUOT; - } - init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); + if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) { + pathrelse(p_s_search_path); + return -EDQUOT; + } + init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, + n_pasted_size); #ifdef DISPLACE_NEW_PACKING_LOCALITIES - s_paste_balance.key = p_s_key->on_disk_key; + s_paste_balance.key = p_s_key->on_disk_key; #endif - /* DQUOT_* can schedule, must check before the fix_nodes */ - if (fs_changed(fs_gen, inode->i_sb)) { - goto search_again; - } - - while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == -REPEAT_SEARCH ) { -search_again: - /* file system changed while we were in the fix_nodes */ - PROC_INFO_INC( th -> t_super, paste_into_item_restarted ); - retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path); - if (retval == IO_ERROR) { - retval = -EIO ; - goto error_out ; + /* DQUOT_* can schedule, must check before the fix_nodes */ + if (fs_changed(fs_gen, inode->i_sb)) { + goto search_again; } - if (retval == POSITION_FOUND) { - reiserfs_warning (inode->i_sb, "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key); - retval = -EEXIST ; - goto error_out ; - } - + + while ((retval = + fix_nodes(M_PASTE, &s_paste_balance, NULL, + p_c_body)) == REPEAT_SEARCH) { + search_again: + /* file system changed while we were in the fix_nodes */ + PROC_INFO_INC(th->t_super, paste_into_item_restarted); + retval = + search_for_position_by_key(th->t_super, p_s_key, + p_s_search_path); + if (retval == IO_ERROR) { + retval = -EIO; + goto error_out; + } + if (retval == POSITION_FOUND) { + reiserfs_warning(inode->i_sb, + "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", + p_s_key); + retval = -EEXIST; + goto error_out; + } #ifdef CONFIG_REISERFS_CHECK - check_research_for_paste (p_s_search_path, p_s_key); + check_research_for_paste(p_s_search_path, p_s_key); #endif - } + } - /* Perform balancing after all resources are collected by fix_nodes, and - accessing them will not risk triggering schedule. */ - if ( retval == CARRY_ON ) { - do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE); - return 0; - } - retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; -error_out: - /* this also releases the path */ - unfix_nodes(&s_paste_balance); + /* Perform balancing after all resources are collected by fix_nodes, and + accessing them will not risk triggering schedule. */ + if (retval == CARRY_ON) { + do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE); + return 0; + } + retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; + error_out: + /* this also releases the path */ + unfix_nodes(&s_paste_balance); #ifdef REISERQUOTA_DEBUG - reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): freeing %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); + reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota paste_into_item(): freeing %u id=%u type=%c", + n_pasted_size, inode->i_uid, + key2type(&(p_s_key->on_disk_key))); #endif - DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); - return retval ; + DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); + return retval; } - /* Insert new item into the buffer at the path. */ -int reiserfs_insert_item(struct reiserfs_transaction_handle *th, - struct path * p_s_path, /* Path to the inserteded item. */ - const struct cpu_key * key, - struct item_head * p_s_ih, /* Pointer to the item header to insert.*/ - struct inode * inode, - const char * p_c_body) /* Pointer to the bytes to insert. */ -{ - struct tree_balance s_ins_balance; - int retval; - int fs_gen = 0 ; - int quota_bytes = 0 ; - - BUG_ON (!th->t_trans_id); - - if (inode) { /* Do we count quotas for item? */ - fs_gen = get_generation(inode->i_sb); - quota_bytes = ih_item_len(p_s_ih); - - /* hack so the quota code doesn't have to guess if the file has - ** a tail, links are always tails, so there's no guessing needed - */ - if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) { - quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ; - } +int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the inserteded item. */ + const struct cpu_key *key, struct item_head *p_s_ih, /* Pointer to the item header to insert. */ + struct inode *inode, const char *p_c_body) +{ /* Pointer to the bytes to insert. */ + struct tree_balance s_ins_balance; + int retval; + int fs_gen = 0; + int quota_bytes = 0; + + BUG_ON(!th->t_trans_id); + + if (inode) { /* Do we count quotas for item? */ + fs_gen = get_generation(inode->i_sb); + quota_bytes = ih_item_len(p_s_ih); + + /* hack so the quota code doesn't have to guess if the file has + ** a tail, links are always tails, so there's no guessing needed + */ + if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) { + quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; + } #ifdef REISERQUOTA_DEBUG - reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); + reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota insert_item(): allocating %u id=%u type=%c", + quota_bytes, inode->i_uid, head2type(p_s_ih)); #endif - /* We can't dirty inode here. It would be immediately written but - * appropriate stat item isn't inserted yet... */ - if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) { - pathrelse(p_s_path); - return -EDQUOT; + /* We can't dirty inode here. It would be immediately written but + * appropriate stat item isn't inserted yet... */ + if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) { + pathrelse(p_s_path); + return -EDQUOT; + } } - } - init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); + init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, + IH_SIZE + ih_item_len(p_s_ih)); #ifdef DISPLACE_NEW_PACKING_LOCALITIES - s_ins_balance.key = key->on_disk_key; + s_ins_balance.key = key->on_disk_key; #endif - /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ - if (inode && fs_changed(fs_gen, inode->i_sb)) { - goto search_again; - } - - while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) { -search_again: - /* file system changed while we were in the fix_nodes */ - PROC_INFO_INC( th -> t_super, insert_item_restarted ); - retval = search_item (th->t_super, key, p_s_path); - if (retval == IO_ERROR) { - retval = -EIO; - goto error_out ; + /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ + if (inode && fs_changed(fs_gen, inode->i_sb)) { + goto search_again; } - if (retval == ITEM_FOUND) { - reiserfs_warning (th->t_super, "PAP-5760: reiserfs_insert_item: " - "key %K already exists in the tree", key); - retval = -EEXIST ; - goto error_out; + + while ((retval = + fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, + p_c_body)) == REPEAT_SEARCH) { + search_again: + /* file system changed while we were in the fix_nodes */ + PROC_INFO_INC(th->t_super, insert_item_restarted); + retval = search_item(th->t_super, key, p_s_path); + if (retval == IO_ERROR) { + retval = -EIO; + goto error_out; + } + if (retval == ITEM_FOUND) { + reiserfs_warning(th->t_super, + "PAP-5760: reiserfs_insert_item: " + "key %K already exists in the tree", + key); + retval = -EEXIST; + goto error_out; + } } - } - /* make balancing after all resources will be collected at a time */ - if ( retval == CARRY_ON ) { - do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT); - return 0; - } + /* make balancing after all resources will be collected at a time */ + if (retval == CARRY_ON) { + do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT); + return 0; + } - retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; -error_out: - /* also releases the path */ - unfix_nodes(&s_ins_balance); + retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; + error_out: + /* also releases the path */ + unfix_nodes(&s_ins_balance); #ifdef REISERQUOTA_DEBUG - reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, + "reiserquota insert_item(): freeing %u id=%u type=%c", + quota_bytes, inode->i_uid, head2type(p_s_ih)); #endif - if (inode) - DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ; - return retval; + if (inode) + DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + return retval; } - - - - diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 4b80ab95d33..6951c35755b 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -35,83 +35,81 @@ static const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING; static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING; static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING; -int is_reiserfs_3_5 (struct reiserfs_super_block * rs) +int is_reiserfs_3_5(struct reiserfs_super_block *rs) { - return !strncmp (rs->s_v1.s_magic, reiserfs_3_5_magic_string, - strlen (reiserfs_3_5_magic_string)); + return !strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string, + strlen(reiserfs_3_5_magic_string)); } - -int is_reiserfs_3_6 (struct reiserfs_super_block * rs) +int is_reiserfs_3_6(struct reiserfs_super_block *rs) { - return !strncmp (rs->s_v1.s_magic, reiserfs_3_6_magic_string, - strlen (reiserfs_3_6_magic_string)); + return !strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string, + strlen(reiserfs_3_6_magic_string)); } - -int is_reiserfs_jr (struct reiserfs_super_block * rs) +int is_reiserfs_jr(struct reiserfs_super_block *rs) { - return !strncmp (rs->s_v1.s_magic, reiserfs_jr_magic_string, - strlen (reiserfs_jr_magic_string)); + return !strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string, + strlen(reiserfs_jr_magic_string)); } - -static int is_any_reiserfs_magic_string (struct reiserfs_super_block * rs) +static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) { - return (is_reiserfs_3_5 (rs) || is_reiserfs_3_6 (rs) || - is_reiserfs_jr (rs)); + return (is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) || + is_reiserfs_jr(rs)); } -static int reiserfs_remount (struct super_block * s, int * flags, char * data); -static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf); +static int reiserfs_remount(struct super_block *s, int *flags, char *data); +static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf); -static int reiserfs_sync_fs (struct super_block * s, int wait) +static int reiserfs_sync_fs(struct super_block *s, int wait) { - if (!(s->s_flags & MS_RDONLY)) { - struct reiserfs_transaction_handle th; - reiserfs_write_lock(s); - if (!journal_begin(&th, s, 1)) - if (!journal_end_sync(&th, s, 1)) - reiserfs_flush_old_commits(s); - s->s_dirt = 0; /* Even if it's not true. - * We'll loop forever in sync_supers otherwise */ - reiserfs_write_unlock(s); - } else { - s->s_dirt = 0; - } - return 0; + if (!(s->s_flags & MS_RDONLY)) { + struct reiserfs_transaction_handle th; + reiserfs_write_lock(s); + if (!journal_begin(&th, s, 1)) + if (!journal_end_sync(&th, s, 1)) + reiserfs_flush_old_commits(s); + s->s_dirt = 0; /* Even if it's not true. + * We'll loop forever in sync_supers otherwise */ + reiserfs_write_unlock(s); + } else { + s->s_dirt = 0; + } + return 0; } static void reiserfs_write_super(struct super_block *s) { - reiserfs_sync_fs(s, 1); + reiserfs_sync_fs(s, 1); } -static void reiserfs_write_super_lockfs (struct super_block * s) +static void reiserfs_write_super_lockfs(struct super_block *s) { - struct reiserfs_transaction_handle th ; - reiserfs_write_lock(s); - if (!(s->s_flags & MS_RDONLY)) { - int err = journal_begin(&th, s, 1) ; - if (err) { - reiserfs_block_writes(&th) ; - } else { - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); - reiserfs_block_writes(&th) ; - journal_end_sync(&th, s, 1) ; - } - } - s->s_dirt = 0; - reiserfs_write_unlock(s); + struct reiserfs_transaction_handle th; + reiserfs_write_lock(s); + if (!(s->s_flags & MS_RDONLY)) { + int err = journal_begin(&th, s, 1); + if (err) { + reiserfs_block_writes(&th); + } else { + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), + 1); + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); + reiserfs_block_writes(&th); + journal_end_sync(&th, s, 1); + } + } + s->s_dirt = 0; + reiserfs_write_unlock(s); } -static void reiserfs_unlockfs(struct super_block *s) { - reiserfs_allow_writes(s) ; +static void reiserfs_unlockfs(struct super_block *s) +{ + reiserfs_allow_writes(s); } -extern const struct in_core_key MAX_IN_CORE_KEY; - +extern const struct in_core_key MAX_IN_CORE_KEY; /* this is used to delete "save link" when there are no items of a file it points to. It can either happen if unlink is completed but @@ -120,364 +118,387 @@ extern const struct in_core_key MAX_IN_CORE_KEY; protecting unlink is bigger that a key lf "save link" which protects truncate), so there left no items to make truncate completion on */ -static int remove_save_link_only (struct super_block * s, struct reiserfs_key * key, int oid_free) +static int remove_save_link_only(struct super_block *s, + struct reiserfs_key *key, int oid_free) { - struct reiserfs_transaction_handle th; - int err; - - /* we are going to do one balancing */ - err = journal_begin (&th, s, JOURNAL_PER_BALANCE_CNT); - if (err) - return err; - - reiserfs_delete_solid_item (&th, NULL, key); - if (oid_free) - /* removals are protected by direct items */ - reiserfs_release_objectid (&th, le32_to_cpu (key->k_objectid)); - - return journal_end (&th, s, JOURNAL_PER_BALANCE_CNT); + struct reiserfs_transaction_handle th; + int err; + + /* we are going to do one balancing */ + err = journal_begin(&th, s, JOURNAL_PER_BALANCE_CNT); + if (err) + return err; + + reiserfs_delete_solid_item(&th, NULL, key); + if (oid_free) + /* removals are protected by direct items */ + reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid)); + + return journal_end(&th, s, JOURNAL_PER_BALANCE_CNT); } - + #ifdef CONFIG_QUOTA static int reiserfs_quota_on_mount(struct super_block *, int); #endif - + /* look for uncompleted unlinks and truncates and complete them */ -static int finish_unfinished (struct super_block * s) +static int finish_unfinished(struct super_block *s) { - INITIALIZE_PATH (path); - struct cpu_key max_cpu_key, obj_key; - struct reiserfs_key save_link_key; - int retval = 0; - struct item_head * ih; - struct buffer_head * bh; - int item_pos; - char * item; - int done; - struct inode * inode; - int truncate; + INITIALIZE_PATH(path); + struct cpu_key max_cpu_key, obj_key; + struct reiserfs_key save_link_key; + int retval = 0; + struct item_head *ih; + struct buffer_head *bh; + int item_pos; + char *item; + int done; + struct inode *inode; + int truncate; #ifdef CONFIG_QUOTA - int i; - int ms_active_set; + int i; + int ms_active_set; #endif - - - /* compose key to look for "save" links */ - max_cpu_key.version = KEY_FORMAT_3_5; - max_cpu_key.on_disk_key.k_dir_id = ~0U; - max_cpu_key.on_disk_key.k_objectid = ~0U; - set_cpu_key_k_offset (&max_cpu_key, ~0U); - max_cpu_key.key_length = 3; + + /* compose key to look for "save" links */ + max_cpu_key.version = KEY_FORMAT_3_5; + max_cpu_key.on_disk_key.k_dir_id = ~0U; + max_cpu_key.on_disk_key.k_objectid = ~0U; + set_cpu_key_k_offset(&max_cpu_key, ~0U); + max_cpu_key.key_length = 3; #ifdef CONFIG_QUOTA - /* Needed for iput() to work correctly and not trash data */ - if (s->s_flags & MS_ACTIVE) { - ms_active_set = 0; - } else { - ms_active_set = 1; - s->s_flags |= MS_ACTIVE; - } - /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < MAXQUOTAS; i++) { - if (REISERFS_SB(s)->s_qf_names[i]) { - int ret = reiserfs_quota_on_mount(s, i); - if (ret < 0) - reiserfs_warning(s, "reiserfs: cannot turn on journalled quota: error %d", ret); - } - } + /* Needed for iput() to work correctly and not trash data */ + if (s->s_flags & MS_ACTIVE) { + ms_active_set = 0; + } else { + ms_active_set = 1; + s->s_flags |= MS_ACTIVE; + } + /* Turn on quotas so that they are updated correctly */ + for (i = 0; i < MAXQUOTAS; i++) { + if (REISERFS_SB(s)->s_qf_names[i]) { + int ret = reiserfs_quota_on_mount(s, i); + if (ret < 0) + reiserfs_warning(s, + "reiserfs: cannot turn on journalled quota: error %d", + ret); + } + } #endif - - done = 0; - REISERFS_SB(s)->s_is_unlinked_ok = 1; - while (!retval) { - retval = search_item (s, &max_cpu_key, &path); - if (retval != ITEM_NOT_FOUND) { - reiserfs_warning (s, "vs-2140: finish_unfinished: search_by_key returned %d", - retval); - break; - } - - bh = get_last_bh (&path); - item_pos = get_item_pos (&path); - if (item_pos != B_NR_ITEMS (bh)) { - reiserfs_warning (s, "vs-2060: finish_unfinished: wrong position found"); - break; - } - item_pos --; - ih = B_N_PITEM_HEAD (bh, item_pos); - - if (le32_to_cpu (ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) - /* there are no "save" links anymore */ - break; - - save_link_key = ih->ih_key; - if (is_indirect_le_ih (ih)) - truncate = 1; - else - truncate = 0; - - /* reiserfs_iget needs k_dirid and k_objectid only */ - item = B_I_PITEM (bh, ih); - obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__le32 *)item); - obj_key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid); - obj_key.on_disk_key.k_offset = 0; - obj_key.on_disk_key.k_type = 0; - - pathrelse (&path); - - inode = reiserfs_iget (s, &obj_key); - if (!inode) { - /* the unlink almost completed, it just did not manage to remove - "save" link and release objectid */ - reiserfs_warning (s, "vs-2180: finish_unfinished: iget failed for %K", - &obj_key); - retval = remove_save_link_only (s, &save_link_key, 1); - continue; - } - - if (!truncate && inode->i_nlink) { - /* file is not unlinked */ - reiserfs_warning (s, "vs-2185: finish_unfinished: file %K is not unlinked", - &obj_key); - retval = remove_save_link_only (s, &save_link_key, 0); - continue; - } - DQUOT_INIT(inode); - - if (truncate && S_ISDIR (inode->i_mode) ) { - /* We got a truncate request for a dir which is impossible. - The only imaginable way is to execute unfinished truncate request - then boot into old kernel, remove the file and create dir with - the same key. */ - reiserfs_warning(s, "green-2101: impossible truncate on a directory %k. Please report", INODE_PKEY (inode)); - retval = remove_save_link_only (s, &save_link_key, 0); - truncate = 0; - iput (inode); - continue; - } - - if (truncate) { - REISERFS_I(inode) -> i_flags |= i_link_saved_truncate_mask; - /* not completed truncate found. New size was committed together - with "save" link */ - reiserfs_info (s, "Truncating %k to %Ld ..", - INODE_PKEY (inode), inode->i_size); - reiserfs_truncate_file (inode, 0/*don't update modification time*/); - retval = remove_save_link (inode, truncate); - } else { - REISERFS_I(inode) -> i_flags |= i_link_saved_unlink_mask; - /* not completed unlink (rmdir) found */ - reiserfs_info (s, "Removing %k..", INODE_PKEY (inode)); - /* removal gets completed in iput */ - retval = 0; - } - - iput (inode); - printk ("done\n"); - done ++; - } - REISERFS_SB(s)->s_is_unlinked_ok = 0; - + + done = 0; + REISERFS_SB(s)->s_is_unlinked_ok = 1; + while (!retval) { + retval = search_item(s, &max_cpu_key, &path); + if (retval != ITEM_NOT_FOUND) { + reiserfs_warning(s, + "vs-2140: finish_unfinished: search_by_key returned %d", + retval); + break; + } + + bh = get_last_bh(&path); + item_pos = get_item_pos(&path); + if (item_pos != B_NR_ITEMS(bh)) { + reiserfs_warning(s, + "vs-2060: finish_unfinished: wrong position found"); + break; + } + item_pos--; + ih = B_N_PITEM_HEAD(bh, item_pos); + + if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) + /* there are no "save" links anymore */ + break; + + save_link_key = ih->ih_key; + if (is_indirect_le_ih(ih)) + truncate = 1; + else + truncate = 0; + + /* reiserfs_iget needs k_dirid and k_objectid only */ + item = B_I_PITEM(bh, ih); + obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item); + obj_key.on_disk_key.k_objectid = + le32_to_cpu(ih->ih_key.k_objectid); + obj_key.on_disk_key.k_offset = 0; + obj_key.on_disk_key.k_type = 0; + + pathrelse(&path); + + inode = reiserfs_iget(s, &obj_key); + if (!inode) { + /* the unlink almost completed, it just did not manage to remove + "save" link and release objectid */ + reiserfs_warning(s, + "vs-2180: finish_unfinished: iget failed for %K", + &obj_key); + retval = remove_save_link_only(s, &save_link_key, 1); + continue; + } + + if (!truncate && inode->i_nlink) { + /* file is not unlinked */ + reiserfs_warning(s, + "vs-2185: finish_unfinished: file %K is not unlinked", + &obj_key); + retval = remove_save_link_only(s, &save_link_key, 0); + continue; + } + DQUOT_INIT(inode); + + if (truncate && S_ISDIR(inode->i_mode)) { + /* We got a truncate request for a dir which is impossible. + The only imaginable way is to execute unfinished truncate request + then boot into old kernel, remove the file and create dir with + the same key. */ + reiserfs_warning(s, + "green-2101: impossible truncate on a directory %k. Please report", + INODE_PKEY(inode)); + retval = remove_save_link_only(s, &save_link_key, 0); + truncate = 0; + iput(inode); + continue; + } + + if (truncate) { + REISERFS_I(inode)->i_flags |= + i_link_saved_truncate_mask; + /* not completed truncate found. New size was committed together + with "save" link */ + reiserfs_info(s, "Truncating %k to %Ld ..", + INODE_PKEY(inode), inode->i_size); + reiserfs_truncate_file(inode, + 0 + /*don't update modification time */ + ); + retval = remove_save_link(inode, truncate); + } else { + REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; + /* not completed unlink (rmdir) found */ + reiserfs_info(s, "Removing %k..", INODE_PKEY(inode)); + /* removal gets completed in iput */ + retval = 0; + } + + iput(inode); + printk("done\n"); + done++; + } + REISERFS_SB(s)->s_is_unlinked_ok = 0; + #ifdef CONFIG_QUOTA - /* Turn quotas off */ - for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(s)->files[i]) - vfs_quota_off_mount(s, i); - } - if (ms_active_set) - /* Restore the flag back */ - s->s_flags &= ~MS_ACTIVE; + /* Turn quotas off */ + for (i = 0; i < MAXQUOTAS; i++) { + if (sb_dqopt(s)->files[i]) + vfs_quota_off_mount(s, i); + } + if (ms_active_set) + /* Restore the flag back */ + s->s_flags &= ~MS_ACTIVE; #endif - pathrelse (&path); - if (done) - reiserfs_info (s, "There were %d uncompleted unlinks/truncates. " - "Completed\n", done); - return retval; + pathrelse(&path); + if (done) + reiserfs_info(s, "There were %d uncompleted unlinks/truncates. " + "Completed\n", done); + return retval; } - + /* to protect file being unlinked from getting lost we "safe" link files being unlinked. This link will be deleted in the same transaction with last item of file. mounting the filesytem we scan all these links and remove files which almost got lost */ -void add_save_link (struct reiserfs_transaction_handle * th, - struct inode * inode, int truncate) +void add_save_link(struct reiserfs_transaction_handle *th, + struct inode *inode, int truncate) { - INITIALIZE_PATH (path); - int retval; - struct cpu_key key; - struct item_head ih; - __le32 link; - - BUG_ON (!th->t_trans_id); - - /* file can only get one "save link" of each kind */ - RFALSE( truncate && - ( REISERFS_I(inode) -> i_flags & i_link_saved_truncate_mask ), - "saved link already exists for truncated inode %lx", - ( long ) inode -> i_ino ); - RFALSE( !truncate && - ( REISERFS_I(inode) -> i_flags & i_link_saved_unlink_mask ), - "saved link already exists for unlinked inode %lx", - ( long ) inode -> i_ino ); - - /* setup key of "save" link */ - key.version = KEY_FORMAT_3_5; - key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; - key.on_disk_key.k_objectid = inode->i_ino; - if (!truncate) { - /* unlink, rmdir, rename */ - set_cpu_key_k_offset (&key, 1 + inode->i_sb->s_blocksize); - set_cpu_key_k_type (&key, TYPE_DIRECT); - - /* item head of "safe" link */ - make_le_item_head (&ih, &key, key.version, 1 + inode->i_sb->s_blocksize, TYPE_DIRECT, - 4/*length*/, 0xffff/*free space*/); - } else { - /* truncate */ - if (S_ISDIR (inode->i_mode)) - reiserfs_warning(inode->i_sb, "green-2102: Adding a truncate savelink for a directory %k! Please report", INODE_PKEY(inode)); - set_cpu_key_k_offset (&key, 1); - set_cpu_key_k_type (&key, TYPE_INDIRECT); - - /* item head of "safe" link */ - make_le_item_head (&ih, &key, key.version, 1, TYPE_INDIRECT, - 4/*length*/, 0/*free space*/); - } - key.key_length = 3; - - /* look for its place in the tree */ - retval = search_item (inode->i_sb, &key, &path); - if (retval != ITEM_NOT_FOUND) { - if ( retval != -ENOSPC ) - reiserfs_warning (inode->i_sb, "vs-2100: add_save_link:" - "search_by_key (%K) returned %d", &key, retval); - pathrelse (&path); - return; - } - - /* body of "save" link */ - link = INODE_PKEY (inode)->k_dir_id; - - /* put "save" link inot tree, don't charge quota to anyone */ - retval = reiserfs_insert_item (th, &path, &key, &ih, NULL, (char *)&link); - if (retval) { - if (retval != -ENOSPC) - reiserfs_warning (inode->i_sb, "vs-2120: add_save_link: insert_item returned %d", - retval); - } else { - if( truncate ) - REISERFS_I(inode) -> i_flags |= i_link_saved_truncate_mask; - else - REISERFS_I(inode) -> i_flags |= i_link_saved_unlink_mask; - } -} + INITIALIZE_PATH(path); + int retval; + struct cpu_key key; + struct item_head ih; + __le32 link; + + BUG_ON(!th->t_trans_id); + + /* file can only get one "save link" of each kind */ + RFALSE(truncate && + (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask), + "saved link already exists for truncated inode %lx", + (long)inode->i_ino); + RFALSE(!truncate && + (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask), + "saved link already exists for unlinked inode %lx", + (long)inode->i_ino); + + /* setup key of "save" link */ + key.version = KEY_FORMAT_3_5; + key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; + key.on_disk_key.k_objectid = inode->i_ino; + if (!truncate) { + /* unlink, rmdir, rename */ + set_cpu_key_k_offset(&key, 1 + inode->i_sb->s_blocksize); + set_cpu_key_k_type(&key, TYPE_DIRECT); + + /* item head of "safe" link */ + make_le_item_head(&ih, &key, key.version, + 1 + inode->i_sb->s_blocksize, TYPE_DIRECT, + 4 /*length */ , 0xffff /*free space */ ); + } else { + /* truncate */ + if (S_ISDIR(inode->i_mode)) + reiserfs_warning(inode->i_sb, + "green-2102: Adding a truncate savelink for a directory %k! Please report", + INODE_PKEY(inode)); + set_cpu_key_k_offset(&key, 1); + set_cpu_key_k_type(&key, TYPE_INDIRECT); + + /* item head of "safe" link */ + make_le_item_head(&ih, &key, key.version, 1, TYPE_INDIRECT, + 4 /*length */ , 0 /*free space */ ); + } + key.key_length = 3; + + /* look for its place in the tree */ + retval = search_item(inode->i_sb, &key, &path); + if (retval != ITEM_NOT_FOUND) { + if (retval != -ENOSPC) + reiserfs_warning(inode->i_sb, "vs-2100: add_save_link:" + "search_by_key (%K) returned %d", &key, + retval); + pathrelse(&path); + return; + } + /* body of "save" link */ + link = INODE_PKEY(inode)->k_dir_id; + + /* put "save" link inot tree, don't charge quota to anyone */ + retval = + reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); + if (retval) { + if (retval != -ENOSPC) + reiserfs_warning(inode->i_sb, + "vs-2120: add_save_link: insert_item returned %d", + retval); + } else { + if (truncate) + REISERFS_I(inode)->i_flags |= + i_link_saved_truncate_mask; + else + REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; + } +} /* this opens transaction unlike add_save_link */ -int remove_save_link (struct inode * inode, int truncate) +int remove_save_link(struct inode *inode, int truncate) { - struct reiserfs_transaction_handle th; - struct reiserfs_key key; - int err; - - /* we are going to do one balancing only */ - err = journal_begin (&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); - if (err) - return err; - - /* setup key of "save" link */ - key.k_dir_id = cpu_to_le32 (MAX_KEY_OBJECTID); - key.k_objectid = INODE_PKEY (inode)->k_objectid; - if (!truncate) { - /* unlink, rmdir, rename */ - set_le_key_k_offset (KEY_FORMAT_3_5, &key, - 1 + inode->i_sb->s_blocksize); - set_le_key_k_type (KEY_FORMAT_3_5, &key, TYPE_DIRECT); - } else { - /* truncate */ - set_le_key_k_offset (KEY_FORMAT_3_5, &key, 1); - set_le_key_k_type (KEY_FORMAT_3_5, &key, TYPE_INDIRECT); - } - - if( ( truncate && - ( REISERFS_I(inode) -> i_flags & i_link_saved_truncate_mask ) ) || - ( !truncate && - ( REISERFS_I(inode) -> i_flags & i_link_saved_unlink_mask ) ) ) - /* don't take quota bytes from anywhere */ - reiserfs_delete_solid_item (&th, NULL, &key); - if (!truncate) { - reiserfs_release_objectid (&th, inode->i_ino); - REISERFS_I(inode) -> i_flags &= ~i_link_saved_unlink_mask; - } else - REISERFS_I(inode) -> i_flags &= ~i_link_saved_truncate_mask; - - return journal_end (&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); -} + struct reiserfs_transaction_handle th; + struct reiserfs_key key; + int err; + + /* we are going to do one balancing only */ + err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); + if (err) + return err; + + /* setup key of "save" link */ + key.k_dir_id = cpu_to_le32(MAX_KEY_OBJECTID); + key.k_objectid = INODE_PKEY(inode)->k_objectid; + if (!truncate) { + /* unlink, rmdir, rename */ + set_le_key_k_offset(KEY_FORMAT_3_5, &key, + 1 + inode->i_sb->s_blocksize); + set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_DIRECT); + } else { + /* truncate */ + set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1); + set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_INDIRECT); + } + if ((truncate && + (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask)) || + (!truncate && + (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask))) + /* don't take quota bytes from anywhere */ + reiserfs_delete_solid_item(&th, NULL, &key); + if (!truncate) { + reiserfs_release_objectid(&th, inode->i_ino); + REISERFS_I(inode)->i_flags &= ~i_link_saved_unlink_mask; + } else + REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask; + + return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); +} -static void reiserfs_put_super (struct super_block * s) +static void reiserfs_put_super(struct super_block *s) { - int i; - struct reiserfs_transaction_handle th ; - th.t_trans_id = 0; - - if (REISERFS_SB(s)->xattr_root) { - d_invalidate (REISERFS_SB(s)->xattr_root); - dput (REISERFS_SB(s)->xattr_root); - } - - if (REISERFS_SB(s)->priv_root) { - d_invalidate (REISERFS_SB(s)->priv_root); - dput (REISERFS_SB(s)->priv_root); - } - - /* change file system state to current state if it was mounted with read-write permissions */ - if (!(s->s_flags & MS_RDONLY)) { - if (!journal_begin(&th, s, 10)) { - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; - set_sb_umount_state( SB_DISK_SUPER_BLOCK(s), REISERFS_SB(s)->s_mount_state ); - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); - } - } - - /* note, journal_release checks for readonly mount, and can decide not - ** to do a journal_end - */ - journal_release(&th, s) ; - - for (i = 0; i < SB_BMAP_NR (s); i ++) - brelse (SB_AP_BITMAP (s)[i].bh); - - vfree (SB_AP_BITMAP (s)); - - brelse (SB_BUFFER_WITH_SB (s)); - - print_statistics (s); - - if (REISERFS_SB(s)->s_kmallocs != 0) { - reiserfs_warning (s, "vs-2004: reiserfs_put_super: allocated memory left %d", - REISERFS_SB(s)->s_kmallocs); - } - - if (REISERFS_SB(s)->reserved_blocks != 0) { - reiserfs_warning (s, "green-2005: reiserfs_put_super: reserved blocks left %d", - REISERFS_SB(s)->reserved_blocks); - } - - reiserfs_proc_info_done( s ); - - kfree(s->s_fs_info); - s->s_fs_info = NULL; - - return; + int i; + struct reiserfs_transaction_handle th; + th.t_trans_id = 0; + + if (REISERFS_SB(s)->xattr_root) { + d_invalidate(REISERFS_SB(s)->xattr_root); + dput(REISERFS_SB(s)->xattr_root); + } + + if (REISERFS_SB(s)->priv_root) { + d_invalidate(REISERFS_SB(s)->priv_root); + dput(REISERFS_SB(s)->priv_root); + } + + /* change file system state to current state if it was mounted with read-write permissions */ + if (!(s->s_flags & MS_RDONLY)) { + if (!journal_begin(&th, s, 10)) { + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), + 1); + set_sb_umount_state(SB_DISK_SUPER_BLOCK(s), + REISERFS_SB(s)->s_mount_state); + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); + } + } + + /* note, journal_release checks for readonly mount, and can decide not + ** to do a journal_end + */ + journal_release(&th, s); + + for (i = 0; i < SB_BMAP_NR(s); i++) + brelse(SB_AP_BITMAP(s)[i].bh); + + vfree(SB_AP_BITMAP(s)); + + brelse(SB_BUFFER_WITH_SB(s)); + + print_statistics(s); + + if (REISERFS_SB(s)->s_kmallocs != 0) { + reiserfs_warning(s, + "vs-2004: reiserfs_put_super: allocated memory left %d", + REISERFS_SB(s)->s_kmallocs); + } + + if (REISERFS_SB(s)->reserved_blocks != 0) { + reiserfs_warning(s, + "green-2005: reiserfs_put_super: reserved blocks left %d", + REISERFS_SB(s)->reserved_blocks); + } + + reiserfs_proc_info_done(s); + + kfree(s->s_fs_info); + s->s_fs_info = NULL; + + return; } -static kmem_cache_t * reiserfs_inode_cachep; +static kmem_cache_t *reiserfs_inode_cachep; static struct inode *reiserfs_alloc_inode(struct super_block *sb) { struct reiserfs_inode_info *ei; - ei = (struct reiserfs_inode_info *)kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL); + ei = (struct reiserfs_inode_info *) + kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -488,25 +509,26 @@ static void reiserfs_destroy_inode(struct inode *inode) kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) { - struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *) foo; + struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_prealloc_list) ; + INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); ei->i_acl_access = NULL; ei->i_acl_default = NULL; } } - + static int init_inodecache(void) { reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", - sizeof(struct reiserfs_inode_info), - 0, SLAB_RECLAIM_ACCOUNT, - init_once, NULL); + sizeof(struct + reiserfs_inode_info), + 0, SLAB_RECLAIM_ACCOUNT, + init_once, NULL); if (reiserfs_inode_cachep == NULL) return -ENOMEM; return 0; @@ -515,72 +537,76 @@ static int init_inodecache(void) static void destroy_inodecache(void) { if (kmem_cache_destroy(reiserfs_inode_cachep)) - reiserfs_warning (NULL, "reiserfs_inode_cache: not all structures were freed"); + reiserfs_warning(NULL, + "reiserfs_inode_cache: not all structures were freed"); } /* we don't mark inodes dirty, we just log them */ -static void reiserfs_dirty_inode (struct inode * inode) { - struct reiserfs_transaction_handle th ; - - int err = 0; - if (inode->i_sb->s_flags & MS_RDONLY) { - reiserfs_warning(inode->i_sb, "clm-6006: writing inode %lu on readonly FS", - inode->i_ino) ; - return ; - } - reiserfs_write_lock(inode->i_sb); - - /* this is really only used for atime updates, so they don't have - ** to be included in O_SYNC or fsync - */ - err = journal_begin(&th, inode->i_sb, 1) ; - if (err) { - reiserfs_write_unlock (inode->i_sb); - return; - } - reiserfs_update_sd (&th, inode); - journal_end(&th, inode->i_sb, 1) ; - reiserfs_write_unlock(inode->i_sb); +static void reiserfs_dirty_inode(struct inode *inode) +{ + struct reiserfs_transaction_handle th; + + int err = 0; + if (inode->i_sb->s_flags & MS_RDONLY) { + reiserfs_warning(inode->i_sb, + "clm-6006: writing inode %lu on readonly FS", + inode->i_ino); + return; + } + reiserfs_write_lock(inode->i_sb); + + /* this is really only used for atime updates, so they don't have + ** to be included in O_SYNC or fsync + */ + err = journal_begin(&th, inode->i_sb, 1); + if (err) { + reiserfs_write_unlock(inode->i_sb); + return; + } + reiserfs_update_sd(&th, inode); + journal_end(&th, inode->i_sb, 1); + reiserfs_write_unlock(inode->i_sb); } -static void reiserfs_clear_inode (struct inode *inode) +static void reiserfs_clear_inode(struct inode *inode) { - struct posix_acl *acl; + struct posix_acl *acl; - acl = REISERFS_I(inode)->i_acl_access; - if (acl && !IS_ERR (acl)) - posix_acl_release (acl); - REISERFS_I(inode)->i_acl_access = NULL; + acl = REISERFS_I(inode)->i_acl_access; + if (acl && !IS_ERR(acl)) + posix_acl_release(acl); + REISERFS_I(inode)->i_acl_access = NULL; - acl = REISERFS_I(inode)->i_acl_default; - if (acl && !IS_ERR (acl)) - posix_acl_release (acl); - REISERFS_I(inode)->i_acl_default = NULL; + acl = REISERFS_I(inode)->i_acl_default; + if (acl && !IS_ERR(acl)) + posix_acl_release(acl); + REISERFS_I(inode)->i_acl_default = NULL; } #ifdef CONFIG_QUOTA -static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); -static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, loff_t); +static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, + size_t, loff_t); +static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, + loff_t); #endif -static struct super_operations reiserfs_sops = -{ - .alloc_inode = reiserfs_alloc_inode, - .destroy_inode = reiserfs_destroy_inode, - .write_inode = reiserfs_write_inode, - .dirty_inode = reiserfs_dirty_inode, - .delete_inode = reiserfs_delete_inode, - .clear_inode = reiserfs_clear_inode, - .put_super = reiserfs_put_super, - .write_super = reiserfs_write_super, - .sync_fs = reiserfs_sync_fs, - .write_super_lockfs = reiserfs_write_super_lockfs, - .unlockfs = reiserfs_unlockfs, - .statfs = reiserfs_statfs, - .remount_fs = reiserfs_remount, +static struct super_operations reiserfs_sops = { + .alloc_inode = reiserfs_alloc_inode, + .destroy_inode = reiserfs_destroy_inode, + .write_inode = reiserfs_write_inode, + .dirty_inode = reiserfs_dirty_inode, + .delete_inode = reiserfs_delete_inode, + .clear_inode = reiserfs_clear_inode, + .put_super = reiserfs_put_super, + .write_super = reiserfs_write_super, + .sync_fs = reiserfs_sync_fs, + .write_super_lockfs = reiserfs_write_super_lockfs, + .unlockfs = reiserfs_unlockfs, + .statfs = reiserfs_statfs, + .remount_fs = reiserfs_remount, #ifdef CONFIG_QUOTA - .quota_read = reiserfs_quota_read, - .quota_write = reiserfs_quota_write, + .quota_read = reiserfs_quota_read, + .quota_write = reiserfs_quota_write, #endif }; @@ -596,50 +622,48 @@ static int reiserfs_mark_dquot_dirty(struct dquot *); static int reiserfs_write_info(struct super_block *, int); static int reiserfs_quota_on(struct super_block *, int, int, char *); -static struct dquot_operations reiserfs_quota_operations = -{ - .initialize = reiserfs_dquot_initialize, - .drop = reiserfs_dquot_drop, - .alloc_space = dquot_alloc_space, - .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, - .free_inode = dquot_free_inode, - .transfer = dquot_transfer, - .write_dquot = reiserfs_write_dquot, - .acquire_dquot = reiserfs_acquire_dquot, - .release_dquot = reiserfs_release_dquot, - .mark_dirty = reiserfs_mark_dquot_dirty, - .write_info = reiserfs_write_info, +static struct dquot_operations reiserfs_quota_operations = { + .initialize = reiserfs_dquot_initialize, + .drop = reiserfs_dquot_drop, + .alloc_space = dquot_alloc_space, + .alloc_inode = dquot_alloc_inode, + .free_space = dquot_free_space, + .free_inode = dquot_free_inode, + .transfer = dquot_transfer, + .write_dquot = reiserfs_write_dquot, + .acquire_dquot = reiserfs_acquire_dquot, + .release_dquot = reiserfs_release_dquot, + .mark_dirty = reiserfs_mark_dquot_dirty, + .write_info = reiserfs_write_info, }; -static struct quotactl_ops reiserfs_qctl_operations = -{ - .quota_on = reiserfs_quota_on, - .quota_off = vfs_quota_off, - .quota_sync = vfs_quota_sync, - .get_info = vfs_get_dqinfo, - .set_info = vfs_set_dqinfo, - .get_dqblk = vfs_get_dqblk, - .set_dqblk = vfs_set_dqblk, +static struct quotactl_ops reiserfs_qctl_operations = { + .quota_on = reiserfs_quota_on, + .quota_off = vfs_quota_off, + .quota_sync = vfs_quota_sync, + .get_info = vfs_get_dqinfo, + .set_info = vfs_set_dqinfo, + .get_dqblk = vfs_get_dqblk, + .set_dqblk = vfs_set_dqblk, }; #endif static struct export_operations reiserfs_export_ops = { - .encode_fh = reiserfs_encode_fh, - .decode_fh = reiserfs_decode_fh, - .get_parent = reiserfs_get_parent, - .get_dentry = reiserfs_get_dentry, -} ; + .encode_fh = reiserfs_encode_fh, + .decode_fh = reiserfs_decode_fh, + .get_parent = reiserfs_get_parent, + .get_dentry = reiserfs_get_dentry, +}; /* this struct is used in reiserfs_getopt () for containing the value for those mount options that have values rather than being toggles. */ typedef struct { - char * value; - int setmask; /* bitmask which is to set on mount_options bitmask when this - value is found, 0 is no bits are to be changed. */ - int clrmask; /* bitmask which is to clear on mount_options bitmask when this - value is found, 0 is no bits are to be changed. This is - applied BEFORE setmask */ + char *value; + int setmask; /* bitmask which is to set on mount_options bitmask when this + value is found, 0 is no bits are to be changed. */ + int clrmask; /* bitmask which is to clear on mount_options bitmask when this + value is found, 0 is no bits are to be changed. This is + applied BEFORE setmask */ } arg_desc_t; /* Set this bit in arg_required to allow empty arguments */ @@ -648,67 +672,70 @@ typedef struct { /* this struct is used in reiserfs_getopt() for describing the set of reiserfs mount options */ typedef struct { - char * option_name; - int arg_required; /* 0 if argument is not required, not 0 otherwise */ - const arg_desc_t * values; /* list of values accepted by an option */ - int setmask; /* bitmask which is to set on mount_options bitmask when this - value is found, 0 is no bits are to be changed. */ - int clrmask; /* bitmask which is to clear on mount_options bitmask when this - value is found, 0 is no bits are to be changed. This is - applied BEFORE setmask */ + char *option_name; + int arg_required; /* 0 if argument is not required, not 0 otherwise */ + const arg_desc_t *values; /* list of values accepted by an option */ + int setmask; /* bitmask which is to set on mount_options bitmask when this + value is found, 0 is no bits are to be changed. */ + int clrmask; /* bitmask which is to clear on mount_options bitmask when this + value is found, 0 is no bits are to be changed. This is + applied BEFORE setmask */ } opt_desc_t; /* possible values for -o data= */ static const arg_desc_t logging_mode[] = { - {"ordered", 1<arg_required otherwise */ -static int reiserfs_getopt ( struct super_block * s, char ** cur, opt_desc_t * opts, char ** opt_arg, - unsigned long * bit_flags) +static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, + char **opt_arg, unsigned long *bit_flags) { - char * p; - /* foo=bar, - ^ ^ ^ - | | +-- option_end - | +-- arg_start - +-- option_start - */ - const opt_desc_t * opt; - const arg_desc_t * arg; - - - p = *cur; - - /* assume argument cannot contain commas */ - *cur = strchr (p, ','); - if (*cur) { - *(*cur) = '\0'; - (*cur) ++; - } - - if ( !strncmp (p, "alloc=", 6) ) { - /* Ugly special case, probably we should redo options parser so that - it can understand several arguments for some options, also so that - it can fill several bitfields with option values. */ - if ( reiserfs_parse_alloc_options( s, p + 6) ) { - return -1; - } else { - return 0; - } - } - - - /* for every option in the list */ - for (opt = opts; opt->option_name; opt ++) { - if (!strncmp (p, opt->option_name, strlen (opt->option_name))) { - if (bit_flags) { - if (opt->clrmask == (1 << REISERFS_UNSUPPORTED_OPT)) - reiserfs_warning (s, "%s not supported.", p); - else - *bit_flags &= ~opt->clrmask; - if (opt->setmask == (1 << REISERFS_UNSUPPORTED_OPT)) - reiserfs_warning (s, "%s not supported.", p); - else - *bit_flags |= opt->setmask; - } - break; - } - } - if (!opt->option_name) { - reiserfs_warning (s, "unknown mount option \"%s\"", p); - return -1; - } - - p += strlen (opt->option_name); - switch (*p) { - case '=': - if (!opt->arg_required) { - reiserfs_warning (s, "the option \"%s\" does not require an argument", - opt->option_name); - return -1; - } - break; - - case 0: - if (opt->arg_required) { - reiserfs_warning (s, "the option \"%s\" requires an argument", opt->option_name); - return -1; - } - break; - default: - reiserfs_warning (s, "head of option \"%s\" is only correct", opt->option_name); - return -1; - } - - /* move to the argument, or to next option if argument is not required */ - p ++; - - if ( opt->arg_required && !(opt->arg_required & (1<option_name); + char *p; + /* foo=bar, + ^ ^ ^ + | | +-- option_end + | +-- arg_start + +-- option_start + */ + const opt_desc_t *opt; + const arg_desc_t *arg; + + p = *cur; + + /* assume argument cannot contain commas */ + *cur = strchr(p, ','); + if (*cur) { + *(*cur) = '\0'; + (*cur)++; + } + + if (!strncmp(p, "alloc=", 6)) { + /* Ugly special case, probably we should redo options parser so that + it can understand several arguments for some options, also so that + it can fill several bitfields with option values. */ + if (reiserfs_parse_alloc_options(s, p + 6)) { + return -1; + } else { + return 0; + } + } + + /* for every option in the list */ + for (opt = opts; opt->option_name; opt++) { + if (!strncmp(p, opt->option_name, strlen(opt->option_name))) { + if (bit_flags) { + if (opt->clrmask == + (1 << REISERFS_UNSUPPORTED_OPT)) + reiserfs_warning(s, "%s not supported.", + p); + else + *bit_flags &= ~opt->clrmask; + if (opt->setmask == + (1 << REISERFS_UNSUPPORTED_OPT)) + reiserfs_warning(s, "%s not supported.", + p); + else + *bit_flags |= opt->setmask; + } + break; + } + } + if (!opt->option_name) { + reiserfs_warning(s, "unknown mount option \"%s\"", p); + return -1; + } + + p += strlen(opt->option_name); + switch (*p) { + case '=': + if (!opt->arg_required) { + reiserfs_warning(s, + "the option \"%s\" does not require an argument", + opt->option_name); + return -1; + } + break; + + case 0: + if (opt->arg_required) { + reiserfs_warning(s, + "the option \"%s\" requires an argument", + opt->option_name); + return -1; + } + break; + default: + reiserfs_warning(s, "head of option \"%s\" is only correct", + opt->option_name); + return -1; + } + + /* move to the argument, or to next option if argument is not required */ + p++; + + if (opt->arg_required + && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY)) + && !strlen(p)) { + /* this catches "option=," if not allowed */ + reiserfs_warning(s, "empty argument for \"%s\"", + opt->option_name); + return -1; + } + + if (!opt->values) { + /* *=NULLopt_arg contains pointer to argument */ + *opt_arg = p; + return opt->arg_required & ~(1 << REISERFS_OPT_ALLOWEMPTY); + } + + /* values possible for this option are listed in opt->values */ + for (arg = opt->values; arg->value; arg++) { + if (!strcmp(p, arg->value)) { + if (bit_flags) { + *bit_flags &= ~arg->clrmask; + *bit_flags |= arg->setmask; + } + return opt->arg_required; + } + } + + reiserfs_warning(s, "bad value \"%s\" for option \"%s\"", p, + opt->option_name); return -1; - } - - if (!opt->values) { - /* *=NULLopt_arg contains pointer to argument */ - *opt_arg = p; - return opt->arg_required & ~(1<values */ - for (arg = opt->values; arg->value; arg ++) { - if (!strcmp (p, arg->value)) { - if (bit_flags) { - *bit_flags &= ~arg->clrmask; - *bit_flags |= arg->setmask; - } - return opt->arg_required; - } - } - - reiserfs_warning (s, "bad value \"%s\" for option \"%s\"", p, opt->option_name); - return -1; } /* returns 0 if something is wrong in option string, 1 - otherwise */ -static int reiserfs_parse_options (struct super_block * s, char * options, /* string given via mount's -o */ - unsigned long * mount_options, - /* after the parsing phase, contains the - collection of bitflags defining what - mount options were selected. */ - unsigned long * blocks, /* strtol-ed from NNN of resize=NNN */ - char ** jdev_name, - unsigned int * commit_max_age) +static int reiserfs_parse_options(struct super_block *s, char *options, /* string given via mount's -o */ + unsigned long *mount_options, + /* after the parsing phase, contains the + collection of bitflags defining what + mount options were selected. */ + unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ + char **jdev_name, + unsigned int *commit_max_age) { - int c; - char * arg = NULL; - char * pos; - opt_desc_t opts[] = { - /* Compatibility stuff, so that -o notail for old setups still work */ - {"tails", .arg_required = 't', .values = tails}, - {"notail", .clrmask = (1<s_bdev->bd_inode->i_size >> s->s_blocksize_bits; - } else { - *blocks = simple_strtoul (arg, &p, 0); - if (*p != '\0') { - /* NNN does not look like a number */ - reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); + {"nolog",}, /* This is unsupported */ + {"replayonly",.setmask = 1 << REPLAYONLY}, + {"block-allocator",.arg_required = 'a',.values = balloc}, + {"data",.arg_required = 'd',.values = logging_mode}, + {"barrier",.arg_required = 'b',.values = barrier_mode}, + {"resize",.arg_required = 'r',.values = NULL}, + {"jdev",.arg_required = 'j',.values = NULL}, + {"nolargeio",.arg_required = 'w',.values = NULL}, + {"commit",.arg_required = 'c',.values = NULL}, + {"usrquota",.setmask = 1 << REISERFS_QUOTA}, + {"grpquota",.setmask = 1 << REISERFS_QUOTA}, + {"noquota",.clrmask = 1 << REISERFS_QUOTA}, + {"errors",.arg_required = 'e',.values = error_actions}, + {"usrjquota",.arg_required = + 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, + {"grpjquota",.arg_required = + 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, + {"jqfmt",.arg_required = 'f',.values = NULL}, + {NULL,} + }; + + *blocks = 0; + if (!options || !*options) + /* use default configuration: create tails, journaling on, no + conversion to newest format */ + return 1; + + for (pos = options; pos;) { + c = reiserfs_getopt(s, &pos, opts, &arg, mount_options); + if (c == -1) + /* wrong option is given */ return 0; - } - } - } - if ( c == 'c' ) { - char *p = NULL; - unsigned long val = simple_strtoul (arg, &p, 0); - /* commit=NNN (time in seconds) */ - if ( *p != '\0' || val >= (unsigned int)-1) { - reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); - return 0; + if (c == 'r') { + char *p; + + p = NULL; + /* "resize=NNN" or "resize=auto" */ + + if (!strcmp(arg, "auto")) { + /* From JFS code, to auto-get the size. */ + *blocks = + s->s_bdev->bd_inode->i_size >> s-> + s_blocksize_bits; + } else { + *blocks = simple_strtoul(arg, &p, 0); + if (*p != '\0') { + /* NNN does not look like a number */ + reiserfs_warning(s, + "reiserfs_parse_options: bad value %s", + arg); + return 0; + } + } } - *commit_max_age = (unsigned int)val; - } - if ( c == 'w' ) { - char *p=NULL; - int val = simple_strtoul (arg, &p, 0); - - if ( *p != '\0') { - reiserfs_warning (s, "reiserfs_parse_options: non-numeric value %s for nolargeio option", arg); - return 0; + if (c == 'c') { + char *p = NULL; + unsigned long val = simple_strtoul(arg, &p, 0); + /* commit=NNN (time in seconds) */ + if (*p != '\0' || val >= (unsigned int)-1) { + reiserfs_warning(s, + "reiserfs_parse_options: bad value %s", + arg); + return 0; + } + *commit_max_age = (unsigned int)val; } - if ( val ) - reiserfs_default_io_size = PAGE_SIZE; - else - reiserfs_default_io_size = 128 * 1024; - } - if (c == 'j') { - if (arg && *arg && jdev_name) { - if ( *jdev_name ) { //Hm, already assigned? - reiserfs_warning (s, "reiserfs_parse_options: journal device was already specified to be %s", *jdev_name); - return 0; + if (c == 'w') { + char *p = NULL; + int val = simple_strtoul(arg, &p, 0); + + if (*p != '\0') { + reiserfs_warning(s, + "reiserfs_parse_options: non-numeric value %s for nolargeio option", + arg); + return 0; + } + if (val) + reiserfs_default_io_size = PAGE_SIZE; + else + reiserfs_default_io_size = 128 * 1024; } - *jdev_name = arg; - } - } -#ifdef CONFIG_QUOTA - if (c == 'u' || c == 'g') { - int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; - - if (sb_any_quota_enabled(s)) { - reiserfs_warning(s, "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); - return 0; - } - if (*arg) { /* Some filename specified? */ - if (REISERFS_SB(s)->s_qf_names[qtype] && strcmp(REISERFS_SB(s)->s_qf_names[qtype], arg)) { - reiserfs_warning(s, "reiserfs_parse_options: %s quota file already specified.", QTYPE2NAME(qtype)); - return 0; + if (c == 'j') { + if (arg && *arg && jdev_name) { + if (*jdev_name) { //Hm, already assigned? + reiserfs_warning(s, + "reiserfs_parse_options: journal device was already specified to be %s", + *jdev_name); + return 0; + } + *jdev_name = arg; + } } - if (strchr(arg, '/')) { - reiserfs_warning(s, "reiserfs_parse_options: quotafile must be on filesystem root."); - return 0; +#ifdef CONFIG_QUOTA + if (c == 'u' || c == 'g') { + int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; + + if (sb_any_quota_enabled(s)) { + reiserfs_warning(s, + "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); + return 0; + } + if (*arg) { /* Some filename specified? */ + if (REISERFS_SB(s)->s_qf_names[qtype] + && strcmp(REISERFS_SB(s)->s_qf_names[qtype], + arg)) { + reiserfs_warning(s, + "reiserfs_parse_options: %s quota file already specified.", + QTYPE2NAME(qtype)); + return 0; + } + if (strchr(arg, '/')) { + reiserfs_warning(s, + "reiserfs_parse_options: quotafile must be on filesystem root."); + return 0; + } + REISERFS_SB(s)->s_qf_names[qtype] = + kmalloc(strlen(arg) + 1, GFP_KERNEL); + if (!REISERFS_SB(s)->s_qf_names[qtype]) { + reiserfs_warning(s, + "reiserfs_parse_options: not enough memory for storing quotafile name."); + return 0; + } + strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); + *mount_options |= 1 << REISERFS_QUOTA; + } else { + if (REISERFS_SB(s)->s_qf_names[qtype]) { + kfree(REISERFS_SB(s)-> + s_qf_names[qtype]); + REISERFS_SB(s)->s_qf_names[qtype] = + NULL; + } + } } - REISERFS_SB(s)->s_qf_names[qtype] = kmalloc(strlen(arg)+1, GFP_KERNEL); - if (!REISERFS_SB(s)->s_qf_names[qtype]) { - reiserfs_warning(s, "reiserfs_parse_options: not enough memory for storing quotafile name."); - return 0; + if (c == 'f') { + if (!strcmp(arg, "vfsold")) + REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; + else if (!strcmp(arg, "vfsv0")) + REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; + else { + reiserfs_warning(s, + "reiserfs_parse_options: unknown quota format specified."); + return 0; + } } - strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); - *mount_options |= 1<s_qf_names[qtype]) { - kfree(REISERFS_SB(s)->s_qf_names[qtype]); - REISERFS_SB(s)->s_qf_names[qtype] = NULL; +#else + if (c == 'u' || c == 'g' || c == 'f') { + reiserfs_warning(s, + "reiserfs_parse_options: journalled quota options not supported."); + return 0; } - } - } - if (c == 'f') { - if (!strcmp(arg, "vfsold")) - REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; - else if (!strcmp(arg, "vfsv0")) - REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; - else { - reiserfs_warning(s, "reiserfs_parse_options: unknown quota format specified."); +#endif + } + +#ifdef CONFIG_QUOTA + if (!REISERFS_SB(s)->s_jquota_fmt + && (REISERFS_SB(s)->s_qf_names[USRQUOTA] + || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) { + reiserfs_warning(s, + "reiserfs_parse_options: journalled quota format not specified."); return 0; - } } -#else - if (c == 'u' || c == 'g' || c == 'f') { - reiserfs_warning(s, "reiserfs_parse_options: journalled quota options not supported."); - return 0; + /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ + if (!(*mount_options & (1 << REISERFS_QUOTA)) + && sb_any_quota_enabled(s)) { + reiserfs_warning(s, + "reiserfs_parse_options: quota options must be present when quota is turned on."); + return 0; } #endif - } - -#ifdef CONFIG_QUOTA - if (!REISERFS_SB(s)->s_jquota_fmt && (REISERFS_SB(s)->s_qf_names[USRQUOTA] || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) { - reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified."); - return 0; - } - /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ - if (!(*mount_options & (1<s_mount_opt &= ~((1 << REISERFS_DATA_LOG) | - (1 << REISERFS_DATA_ORDERED) | - (1 << REISERFS_DATA_WRITEBACK)); - REISERFS_SB(s)->s_mount_opt |= (1 << mode); +static void switch_data_mode(struct super_block *s, unsigned long mode) +{ + REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) | + (1 << REISERFS_DATA_ORDERED) | + (1 << REISERFS_DATA_WRITEBACK)); + REISERFS_SB(s)->s_mount_opt |= (1 << mode); } static void handle_data_mode(struct super_block *s, unsigned long mount_options) { - if (mount_options & (1 << REISERFS_DATA_LOG)) { - if (!reiserfs_data_log(s)) { - switch_data_mode(s, REISERFS_DATA_LOG); - reiserfs_info (s, "switching to journaled data mode\n"); - } - } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) { - if (!reiserfs_data_ordered(s)) { - switch_data_mode(s, REISERFS_DATA_ORDERED); - reiserfs_info (s, "switching to ordered data mode\n"); - } - } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) { - if (!reiserfs_data_writeback(s)) { - switch_data_mode(s, REISERFS_DATA_WRITEBACK); - reiserfs_info (s, "switching to writeback data mode\n"); - } - } + if (mount_options & (1 << REISERFS_DATA_LOG)) { + if (!reiserfs_data_log(s)) { + switch_data_mode(s, REISERFS_DATA_LOG); + reiserfs_info(s, "switching to journaled data mode\n"); + } + } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) { + if (!reiserfs_data_ordered(s)) { + switch_data_mode(s, REISERFS_DATA_ORDERED); + reiserfs_info(s, "switching to ordered data mode\n"); + } + } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) { + if (!reiserfs_data_writeback(s)) { + switch_data_mode(s, REISERFS_DATA_WRITEBACK); + reiserfs_info(s, "switching to writeback data mode\n"); + } + } } -static void handle_barrier_mode(struct super_block *s, unsigned long bits) { - int flush = (1 << REISERFS_BARRIER_FLUSH); - int none = (1 << REISERFS_BARRIER_NONE); - int all_barrier = flush | none; - - if (bits & all_barrier) { - REISERFS_SB(s)->s_mount_opt &= ~all_barrier; - if (bits & flush) { - REISERFS_SB(s)->s_mount_opt |= flush; - printk("reiserfs: enabling write barrier flush mode\n"); - } else if (bits & none) { - REISERFS_SB(s)->s_mount_opt |= none; - printk("reiserfs: write barriers turned off\n"); - } - } +static void handle_barrier_mode(struct super_block *s, unsigned long bits) +{ + int flush = (1 << REISERFS_BARRIER_FLUSH); + int none = (1 << REISERFS_BARRIER_NONE); + int all_barrier = flush | none; + + if (bits & all_barrier) { + REISERFS_SB(s)->s_mount_opt &= ~all_barrier; + if (bits & flush) { + REISERFS_SB(s)->s_mount_opt |= flush; + printk("reiserfs: enabling write barrier flush mode\n"); + } else if (bits & none) { + REISERFS_SB(s)->s_mount_opt |= none; + printk("reiserfs: write barriers turned off\n"); + } + } } -static void handle_attrs( struct super_block *s ) +static void handle_attrs(struct super_block *s) { - struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); + struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - if( reiserfs_attrs( s ) ) { - if( old_format_only(s) ) { - reiserfs_warning(s, "reiserfs: cannot support attributes on 3.5.x disk format" ); - REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS ); + if (reiserfs_attrs(s)) { + if (old_format_only(s)) { + reiserfs_warning(s, + "reiserfs: cannot support attributes on 3.5.x disk format"); + REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); return; } - if( !( le32_to_cpu( rs -> s_flags ) & reiserfs_attrs_cleared ) ) { - reiserfs_warning(s, "reiserfs: cannot support attributes until flag is set in super-block" ); - REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS ); + if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) { + reiserfs_warning(s, + "reiserfs: cannot support attributes until flag is set in super-block"); + REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); } - } else if (le32_to_cpu( rs -> s_flags ) & reiserfs_attrs_cleared) { + } else if (le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared) { REISERFS_SB(s)->s_mount_opt |= REISERFS_ATTRS; } } -static int reiserfs_remount (struct super_block * s, int * mount_flags, char * arg) +static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) { - struct reiserfs_super_block * rs; - struct reiserfs_transaction_handle th ; - unsigned long blocks; - unsigned long mount_options = REISERFS_SB(s)->s_mount_opt; - unsigned long safe_mask = 0; - unsigned int commit_max_age = (unsigned int)-1; - struct reiserfs_journal *journal = SB_JOURNAL(s); - int err; + struct reiserfs_super_block *rs; + struct reiserfs_transaction_handle th; + unsigned long blocks; + unsigned long mount_options = REISERFS_SB(s)->s_mount_opt; + unsigned long safe_mask = 0; + unsigned int commit_max_age = (unsigned int)-1; + struct reiserfs_journal *journal = SB_JOURNAL(s); + int err; #ifdef CONFIG_QUOTA - int i; + int i; #endif - rs = SB_DISK_SUPER_BLOCK (s); + rs = SB_DISK_SUPER_BLOCK(s); - if (!reiserfs_parse_options(s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { + if (!reiserfs_parse_options + (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) - if (REISERFS_SB(s)->s_qf_names[i]) { - kfree(REISERFS_SB(s)->s_qf_names[i]); - REISERFS_SB(s)->s_qf_names[i] = NULL; - } + for (i = 0; i < MAXQUOTAS; i++) + if (REISERFS_SB(s)->s_qf_names[i]) { + kfree(REISERFS_SB(s)->s_qf_names[i]); + REISERFS_SB(s)->s_qf_names[i] = NULL; + } #endif - return -EINVAL; - } - - handle_attrs(s); - - /* Add options that are safe here */ - safe_mask |= 1 << REISERFS_SMALLTAIL; - safe_mask |= 1 << REISERFS_LARGETAIL; - safe_mask |= 1 << REISERFS_NO_BORDER; - safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION; - safe_mask |= 1 << REISERFS_HASHED_RELOCATION; - safe_mask |= 1 << REISERFS_TEST4; - safe_mask |= 1 << REISERFS_ATTRS; - safe_mask |= 1 << REISERFS_XATTRS_USER; - safe_mask |= 1 << REISERFS_POSIXACL; - safe_mask |= 1 << REISERFS_BARRIER_FLUSH; - safe_mask |= 1 << REISERFS_BARRIER_NONE; - safe_mask |= 1 << REISERFS_ERROR_RO; - safe_mask |= 1 << REISERFS_ERROR_CONTINUE; - safe_mask |= 1 << REISERFS_ERROR_PANIC; - safe_mask |= 1 << REISERFS_QUOTA; - - /* Update the bitmask, taking care to keep - * the bits we're not allowed to change here */ - REISERFS_SB(s)->s_mount_opt = (REISERFS_SB(s)->s_mount_opt & ~safe_mask) | (mount_options & safe_mask); - - if(commit_max_age != 0 && commit_max_age != (unsigned int)-1) { - journal->j_max_commit_age = commit_max_age; - journal->j_max_trans_age = commit_max_age; - } - else if(commit_max_age == 0) - { - /* 0 means restore defaults. */ - journal->j_max_commit_age = journal->j_default_max_commit_age; - journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; - } - - if(blocks) { - int rc = reiserfs_resize(s, blocks); - if (rc != 0) - return rc; - } - - if (*mount_flags & MS_RDONLY) { - reiserfs_xattr_init (s, *mount_flags); - /* remount read-only */ - if (s->s_flags & MS_RDONLY) - /* it is read-only already */ - return 0; - /* try to remount file system with read-only permissions */ - if (sb_umount_state(rs) == REISERFS_VALID_FS || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { - return 0; - } - - err = journal_begin(&th, s, 10) ; - if (err) - return err; - - /* Mounting a rw partition read-only. */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; - set_sb_umount_state( rs, REISERFS_SB(s)->s_mount_state ); - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); - } else { - /* remount read-write */ - if (!(s->s_flags & MS_RDONLY)) { - reiserfs_xattr_init (s, *mount_flags); - return 0; /* We are read-write already */ - } - - if (reiserfs_is_journal_aborted (journal)) - return journal->j_errno; - - handle_data_mode(s, mount_options); - handle_barrier_mode(s, mount_options); - REISERFS_SB(s)->s_mount_state = sb_umount_state(rs) ; - s->s_flags &= ~MS_RDONLY ; /* now it is safe to call journal_begin */ - err = journal_begin(&th, s, 10) ; - if (err) - return err; - - /* Mount a partition which is read-only, read-write */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; - REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); - s->s_flags &= ~MS_RDONLY; - set_sb_umount_state( rs, REISERFS_ERROR_FS ); - /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); - REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS ; - } - /* this will force a full flush of all journal lists */ - SB_JOURNAL(s)->j_must_wait = 1 ; - err = journal_end(&th, s, 10) ; - if (err) - return err; - s->s_dirt = 0; - - if (!( *mount_flags & MS_RDONLY ) ) { - finish_unfinished( s ); - reiserfs_xattr_init (s, *mount_flags); - } - - return 0; + return -EINVAL; + } + + handle_attrs(s); + + /* Add options that are safe here */ + safe_mask |= 1 << REISERFS_SMALLTAIL; + safe_mask |= 1 << REISERFS_LARGETAIL; + safe_mask |= 1 << REISERFS_NO_BORDER; + safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION; + safe_mask |= 1 << REISERFS_HASHED_RELOCATION; + safe_mask |= 1 << REISERFS_TEST4; + safe_mask |= 1 << REISERFS_ATTRS; + safe_mask |= 1 << REISERFS_XATTRS_USER; + safe_mask |= 1 << REISERFS_POSIXACL; + safe_mask |= 1 << REISERFS_BARRIER_FLUSH; + safe_mask |= 1 << REISERFS_BARRIER_NONE; + safe_mask |= 1 << REISERFS_ERROR_RO; + safe_mask |= 1 << REISERFS_ERROR_CONTINUE; + safe_mask |= 1 << REISERFS_ERROR_PANIC; + safe_mask |= 1 << REISERFS_QUOTA; + + /* Update the bitmask, taking care to keep + * the bits we're not allowed to change here */ + REISERFS_SB(s)->s_mount_opt = + (REISERFS_SB(s)-> + s_mount_opt & ~safe_mask) | (mount_options & safe_mask); + + if (commit_max_age != 0 && commit_max_age != (unsigned int)-1) { + journal->j_max_commit_age = commit_max_age; + journal->j_max_trans_age = commit_max_age; + } else if (commit_max_age == 0) { + /* 0 means restore defaults. */ + journal->j_max_commit_age = journal->j_default_max_commit_age; + journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; + } + + if (blocks) { + int rc = reiserfs_resize(s, blocks); + if (rc != 0) + return rc; + } + + if (*mount_flags & MS_RDONLY) { + reiserfs_xattr_init(s, *mount_flags); + /* remount read-only */ + if (s->s_flags & MS_RDONLY) + /* it is read-only already */ + return 0; + /* try to remount file system with read-only permissions */ + if (sb_umount_state(rs) == REISERFS_VALID_FS + || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { + return 0; + } + + err = journal_begin(&th, s, 10); + if (err) + return err; + + /* Mounting a rw partition read-only. */ + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); + set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state); + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); + } else { + /* remount read-write */ + if (!(s->s_flags & MS_RDONLY)) { + reiserfs_xattr_init(s, *mount_flags); + return 0; /* We are read-write already */ + } + + if (reiserfs_is_journal_aborted(journal)) + return journal->j_errno; + + handle_data_mode(s, mount_options); + handle_barrier_mode(s, mount_options); + REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); + s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ + err = journal_begin(&th, s, 10); + if (err) + return err; + + /* Mount a partition which is read-only, read-write */ + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); + REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); + s->s_flags &= ~MS_RDONLY; + set_sb_umount_state(rs, REISERFS_ERROR_FS); + /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); + REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS; + } + /* this will force a full flush of all journal lists */ + SB_JOURNAL(s)->j_must_wait = 1; + err = journal_end(&th, s, 10); + if (err) + return err; + s->s_dirt = 0; + + if (!(*mount_flags & MS_RDONLY)) { + finish_unfinished(s); + reiserfs_xattr_init(s, *mount_flags); + } + + return 0; } /* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk. @@ -1214,761 +1285,829 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a * free blocks at all. */ -static void load_bitmap_info_data (struct super_block *sb, - struct reiserfs_bitmap_info *bi) +static void load_bitmap_info_data(struct super_block *sb, + struct reiserfs_bitmap_info *bi) { - unsigned long *cur = (unsigned long *)bi->bh->b_data; - - while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) { - - /* No need to scan if all 0's or all 1's. - * Since we're only counting 0's, we can simply ignore all 1's */ - if (*cur == 0) { - if (bi->first_zero_hint == 0) { - bi->first_zero_hint = ((char *)cur - bi->bh->b_data) << 3; - } - bi->free_count += sizeof(unsigned long)*8; - } else if (*cur != ~0L) { - int b; - for (b = 0; b < sizeof(unsigned long)*8; b++) { - if (!reiserfs_test_le_bit (b, cur)) { - bi->free_count ++; - if (bi->first_zero_hint == 0) - bi->first_zero_hint = - (((char *)cur - bi->bh->b_data) << 3) + b; - } + unsigned long *cur = (unsigned long *)bi->bh->b_data; + + while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) { + + /* No need to scan if all 0's or all 1's. + * Since we're only counting 0's, we can simply ignore all 1's */ + if (*cur == 0) { + if (bi->first_zero_hint == 0) { + bi->first_zero_hint = + ((char *)cur - bi->bh->b_data) << 3; + } + bi->free_count += sizeof(unsigned long) * 8; + } else if (*cur != ~0L) { + int b; + for (b = 0; b < sizeof(unsigned long) * 8; b++) { + if (!reiserfs_test_le_bit(b, cur)) { + bi->free_count++; + if (bi->first_zero_hint == 0) + bi->first_zero_hint = + (((char *)cur - + bi->bh->b_data) << 3) + b; + } + } } - } - cur ++; - } + cur++; + } #ifdef CONFIG_REISERFS_CHECK // This outputs a lot of unneded info on big FSes // reiserfs_warning ("bitmap loaded from block %d: %d free blocks", -// bi->bh->b_blocknr, bi->free_count); +// bi->bh->b_blocknr, bi->free_count); #endif } - -static int read_bitmaps (struct super_block * s) + +static int read_bitmaps(struct super_block *s) { - int i, bmap_nr; + int i, bmap_nr; + + SB_AP_BITMAP(s) = + vmalloc(sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); + if (SB_AP_BITMAP(s) == 0) + return 1; + memset(SB_AP_BITMAP(s), 0, + sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); + for (i = 0, bmap_nr = + REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; + i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) { + SB_AP_BITMAP(s)[i].bh = sb_getblk(s, bmap_nr); + if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) + ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh); + } + for (i = 0; i < SB_BMAP_NR(s); i++) { + wait_on_buffer(SB_AP_BITMAP(s)[i].bh); + if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { + reiserfs_warning(s, "sh-2029: reiserfs read_bitmaps: " + "bitmap block (#%lu) reading failed", + SB_AP_BITMAP(s)[i].bh->b_blocknr); + for (i = 0; i < SB_BMAP_NR(s); i++) + brelse(SB_AP_BITMAP(s)[i].bh); + vfree(SB_AP_BITMAP(s)); + SB_AP_BITMAP(s) = NULL; + return 1; + } + load_bitmap_info_data(s, SB_AP_BITMAP(s) + i); + } + return 0; +} - SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); - if (SB_AP_BITMAP (s) == 0) - return 1; - memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); - for (i = 0, bmap_nr = REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; - i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) { - SB_AP_BITMAP (s)[i].bh = sb_getblk(s, bmap_nr); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) - ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh); - } - for (i = 0; i < SB_BMAP_NR(s); i++) { - wait_on_buffer(SB_AP_BITMAP (s)[i].bh); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { - reiserfs_warning(s,"sh-2029: reiserfs read_bitmaps: " - "bitmap block (#%lu) reading failed", - SB_AP_BITMAP(s)[i].bh->b_blocknr); - for (i = 0; i < SB_BMAP_NR(s); i++) - brelse(SB_AP_BITMAP(s)[i].bh); - vfree(SB_AP_BITMAP(s)); - SB_AP_BITMAP(s) = NULL; - return 1; +static int read_old_bitmaps(struct super_block *s) +{ + int i; + struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); + int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ + + /* read true bitmap */ + SB_AP_BITMAP(s) = + vmalloc(sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); + if (SB_AP_BITMAP(s) == 0) + return 1; + + memset(SB_AP_BITMAP(s), 0, + sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); + + for (i = 0; i < sb_bmap_nr(rs); i++) { + SB_AP_BITMAP(s)[i].bh = sb_bread(s, bmp1 + i); + if (!SB_AP_BITMAP(s)[i].bh) + return 1; + load_bitmap_info_data(s, SB_AP_BITMAP(s) + i); } - load_bitmap_info_data (s, SB_AP_BITMAP (s) + i); - } - return 0; + + return 0; } -static int read_old_bitmaps (struct super_block * s) +static int read_super_block(struct super_block *s, int offset) { - int i ; - struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s); - int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ + struct buffer_head *bh; + struct reiserfs_super_block *rs; + int fs_blocksize; - /* read true bitmap */ - SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); - if (SB_AP_BITMAP (s) == 0) - return 1; + bh = sb_bread(s, offset / s->s_blocksize); + if (!bh) { + reiserfs_warning(s, "sh-2006: read_super_block: " + "bread failed (dev %s, block %lu, size %lu)", + reiserfs_bdevname(s), offset / s->s_blocksize, + s->s_blocksize); + return 1; + } - memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); + rs = (struct reiserfs_super_block *)bh->b_data; + if (!is_any_reiserfs_magic_string(rs)) { + brelse(bh); + return 1; + } + // + // ok, reiserfs signature (old or new) found in at the given offset + // + fs_blocksize = sb_blocksize(rs); + brelse(bh); + sb_set_blocksize(s, fs_blocksize); - for (i = 0; i < sb_bmap_nr(rs); i ++) { - SB_AP_BITMAP (s)[i].bh = sb_bread (s, bmp1 + i); - if (!SB_AP_BITMAP (s)[i].bh) - return 1; - load_bitmap_info_data (s, SB_AP_BITMAP (s) + i); - } + bh = sb_bread(s, offset / s->s_blocksize); + if (!bh) { + reiserfs_warning(s, "sh-2007: read_super_block: " + "bread failed (dev %s, block %lu, size %lu)\n", + reiserfs_bdevname(s), offset / s->s_blocksize, + s->s_blocksize); + return 1; + } - return 0; -} + rs = (struct reiserfs_super_block *)bh->b_data; + if (sb_blocksize(rs) != s->s_blocksize) { + reiserfs_warning(s, "sh-2011: read_super_block: " + "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n", + reiserfs_bdevname(s), + (unsigned long long)bh->b_blocknr, + s->s_blocksize); + brelse(bh); + return 1; + } -static int read_super_block (struct super_block * s, int offset) -{ - struct buffer_head * bh; - struct reiserfs_super_block * rs; - int fs_blocksize; - - - bh = sb_bread (s, offset / s->s_blocksize); - if (!bh) { - reiserfs_warning (s, "sh-2006: read_super_block: " - "bread failed (dev %s, block %lu, size %lu)", - reiserfs_bdevname (s), offset / s->s_blocksize, s->s_blocksize); - return 1; - } - - rs = (struct reiserfs_super_block *)bh->b_data; - if (!is_any_reiserfs_magic_string (rs)) { - brelse (bh); - return 1; - } - - // - // ok, reiserfs signature (old or new) found in at the given offset - // - fs_blocksize = sb_blocksize(rs); - brelse (bh); - sb_set_blocksize (s, fs_blocksize); - - bh = sb_bread (s, offset / s->s_blocksize); - if (!bh) { - reiserfs_warning (s, "sh-2007: read_super_block: " - "bread failed (dev %s, block %lu, size %lu)\n", - reiserfs_bdevname (s), offset / s->s_blocksize, s->s_blocksize); - return 1; - } - - rs = (struct reiserfs_super_block *)bh->b_data; - if (sb_blocksize(rs) != s->s_blocksize) { - reiserfs_warning (s, "sh-2011: read_super_block: " - "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n", - reiserfs_bdevname (s), (unsigned long long)bh->b_blocknr, s->s_blocksize); - brelse (bh); - return 1; - } - - if ( rs->s_v1.s_root_block == cpu_to_le32(-1) ) { - brelse(bh) ; - reiserfs_warning (s, "Unfinished reiserfsck --rebuild-tree run detected. Please run\n" - "reiserfsck --rebuild-tree and wait for a completion. If that fails\n" - "get newer reiserfsprogs package"); - return 1; - } - - SB_BUFFER_WITH_SB (s) = bh; - SB_DISK_SUPER_BLOCK (s) = rs; - - if (is_reiserfs_jr (rs)) { - /* magic is of non-standard journal filesystem, look at s_version to - find which format is in use */ - if (sb_version(rs) == REISERFS_VERSION_2) - reiserfs_warning (s, "read_super_block: found reiserfs format \"3.6\"" - " with non-standard journal"); - else if (sb_version(rs) == REISERFS_VERSION_1) - reiserfs_warning (s, "read_super_block: found reiserfs format \"3.5\"" - " with non-standard journal"); - else { - reiserfs_warning (s, "sh-2012: read_super_block: found unknown " - "format \"%u\" of reiserfs with non-standard magic", - sb_version(rs)); - return 1; + if (rs->s_v1.s_root_block == cpu_to_le32(-1)) { + brelse(bh); + reiserfs_warning(s, + "Unfinished reiserfsck --rebuild-tree run detected. Please run\n" + "reiserfsck --rebuild-tree and wait for a completion. If that fails\n" + "get newer reiserfsprogs package"); + return 1; } - } - else - /* s_version of standard format may contain incorrect information, - so we just look at the magic string */ - reiserfs_info (s, "found reiserfs format \"%s\" with standard journal\n", - is_reiserfs_3_5 (rs) ? "3.5" : "3.6"); - s->s_op = &reiserfs_sops; - s->s_export_op = &reiserfs_export_ops; + SB_BUFFER_WITH_SB(s) = bh; + SB_DISK_SUPER_BLOCK(s) = rs; + + if (is_reiserfs_jr(rs)) { + /* magic is of non-standard journal filesystem, look at s_version to + find which format is in use */ + if (sb_version(rs) == REISERFS_VERSION_2) + reiserfs_warning(s, + "read_super_block: found reiserfs format \"3.6\"" + " with non-standard journal"); + else if (sb_version(rs) == REISERFS_VERSION_1) + reiserfs_warning(s, + "read_super_block: found reiserfs format \"3.5\"" + " with non-standard journal"); + else { + reiserfs_warning(s, + "sh-2012: read_super_block: found unknown " + "format \"%u\" of reiserfs with non-standard magic", + sb_version(rs)); + return 1; + } + } else + /* s_version of standard format may contain incorrect information, + so we just look at the magic string */ + reiserfs_info(s, + "found reiserfs format \"%s\" with standard journal\n", + is_reiserfs_3_5(rs) ? "3.5" : "3.6"); + + s->s_op = &reiserfs_sops; + s->s_export_op = &reiserfs_export_ops; #ifdef CONFIG_QUOTA - s->s_qcop = &reiserfs_qctl_operations; - s->dq_op = &reiserfs_quota_operations; + s->s_qcop = &reiserfs_qctl_operations; + s->dq_op = &reiserfs_quota_operations; #endif - /* new format is limited by the 32 bit wide i_blocks field, want to - ** be one full block below that. - */ - s->s_maxbytes = (512LL << 32) - s->s_blocksize ; - return 0; + /* new format is limited by the 32 bit wide i_blocks field, want to + ** be one full block below that. + */ + s->s_maxbytes = (512LL << 32) - s->s_blocksize; + return 0; } - - /* after journal replay, reread all bitmap and super blocks */ -static int reread_meta_blocks(struct super_block *s) { - int i ; - ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))) ; - wait_on_buffer(SB_BUFFER_WITH_SB(s)) ; - if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { - reiserfs_warning (s, "reread_meta_blocks, error reading the super") ; - return 1 ; - } - - for (i = 0; i < SB_BMAP_NR(s) ; i++) { - ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh)) ; - wait_on_buffer(SB_AP_BITMAP(s)[i].bh) ; - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { - reiserfs_warning (s, "reread_meta_blocks, error reading bitmap block number %d at %llu", - i, (unsigned long long)SB_AP_BITMAP(s)[i].bh->b_blocknr) ; - return 1 ; - } - } - return 0 ; +static int reread_meta_blocks(struct super_block *s) +{ + int i; + ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); + wait_on_buffer(SB_BUFFER_WITH_SB(s)); + if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { + reiserfs_warning(s, + "reread_meta_blocks, error reading the super"); + return 1; + } -} + for (i = 0; i < SB_BMAP_NR(s); i++) { + ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh)); + wait_on_buffer(SB_AP_BITMAP(s)[i].bh); + if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { + reiserfs_warning(s, + "reread_meta_blocks, error reading bitmap block number %d at %llu", + i, + (unsigned long long)SB_AP_BITMAP(s)[i]. + bh->b_blocknr); + return 1; + } + } + return 0; +} ///////////////////////////////////////////////////// // hash detection stuff - // if root directory is empty - we set default - Yura's - hash and // warn about it // FIXME: we look for only one name in a directory. If tea and yura // bith have the same value - we ask user to send report to the // mailing list -static __u32 find_hash_out (struct super_block * s) +static __u32 find_hash_out(struct super_block *s) { - int retval; - struct inode * inode; - struct cpu_key key; - INITIALIZE_PATH (path); - struct reiserfs_dir_entry de; - __u32 hash = DEFAULT_HASH; - - inode = s->s_root->d_inode; - - do { // Some serious "goto"-hater was there ;) - u32 teahash, r5hash, yurahash; - - make_cpu_key (&key, inode, ~0, TYPE_DIRENTRY, 3); - retval = search_by_entry_key (s, &key, &path, &de); - if (retval == IO_ERROR) { - pathrelse (&path); - return UNSET_HASH ; - } - if (retval == NAME_NOT_FOUND) - de.de_entry_num --; - set_de_name_and_namelen (&de); - if (deh_offset( &(de.de_deh[de.de_entry_num]) ) == DOT_DOT_OFFSET) { - /* allow override in this case */ - if (reiserfs_rupasov_hash(s)) { - hash = YURA_HASH ; - } - reiserfs_warning(s,"FS seems to be empty, autodetect " - "is using the default hash"); - break; - } - r5hash=GET_HASH_VALUE (r5_hash (de.de_name, de.de_namelen)); - teahash=GET_HASH_VALUE (keyed_hash (de.de_name, de.de_namelen)); - yurahash=GET_HASH_VALUE (yura_hash (de.de_name, de.de_namelen)); - if ( ( (teahash == r5hash) && (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) ) || - ( (teahash == yurahash) && (yurahash == GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])))) ) || - ( (r5hash == yurahash) && (yurahash == GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])))) ) ) { - reiserfs_warning(s,"Unable to automatically detect hash function. " - "Please mount with -o hash={tea,rupasov,r5}", - reiserfs_bdevname (s)); - hash = UNSET_HASH; - break; - } - if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == yurahash) - hash = YURA_HASH; - else if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == teahash) - hash = TEA_HASH; - else if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == r5hash) - hash = R5_HASH; - else { - reiserfs_warning (s,"Unrecognised hash function"); - hash = UNSET_HASH; - } - } while (0); - - pathrelse (&path); - return hash; + int retval; + struct inode *inode; + struct cpu_key key; + INITIALIZE_PATH(path); + struct reiserfs_dir_entry de; + __u32 hash = DEFAULT_HASH; + + inode = s->s_root->d_inode; + + do { // Some serious "goto"-hater was there ;) + u32 teahash, r5hash, yurahash; + + make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); + retval = search_by_entry_key(s, &key, &path, &de); + if (retval == IO_ERROR) { + pathrelse(&path); + return UNSET_HASH; + } + if (retval == NAME_NOT_FOUND) + de.de_entry_num--; + set_de_name_and_namelen(&de); + if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) { + /* allow override in this case */ + if (reiserfs_rupasov_hash(s)) { + hash = YURA_HASH; + } + reiserfs_warning(s, "FS seems to be empty, autodetect " + "is using the default hash"); + break; + } + r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); + teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen)); + yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen)); + if (((teahash == r5hash) + && + (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) + == r5hash)) || ((teahash == yurahash) + && (yurahash == + GET_HASH_VALUE(deh_offset + (& + (de. + de_deh[de. + de_entry_num]))))) + || ((r5hash == yurahash) + && (yurahash == + GET_HASH_VALUE(deh_offset + (&(de.de_deh[de.de_entry_num])))))) { + reiserfs_warning(s, + "Unable to automatically detect hash function. " + "Please mount with -o hash={tea,rupasov,r5}", + reiserfs_bdevname(s)); + hash = UNSET_HASH; + break; + } + if (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) == + yurahash) + hash = YURA_HASH; + else if (GET_HASH_VALUE + (deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash) + hash = TEA_HASH; + else if (GET_HASH_VALUE + (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) + hash = R5_HASH; + else { + reiserfs_warning(s, "Unrecognised hash function"); + hash = UNSET_HASH; + } + } while (0); + + pathrelse(&path); + return hash; } // finds out which hash names are sorted with -static int what_hash (struct super_block * s) +static int what_hash(struct super_block *s) { - __u32 code; - - code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); - - /* reiserfs_hash_detect() == true if any of the hash mount options - ** were used. We must check them to make sure the user isn't - ** using a bad hash value - */ - if (code == UNSET_HASH || reiserfs_hash_detect(s)) - code = find_hash_out (s); - - if (code != UNSET_HASH && reiserfs_hash_detect(s)) { - /* detection has found the hash, and we must check against the - ** mount options - */ - if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { - reiserfs_warning (s, "Error, %s hash detected, " - "unable to force rupasov hash", reiserfs_hashname(code)) ; - code = UNSET_HASH ; - } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { - reiserfs_warning (s, "Error, %s hash detected, " - "unable to force tea hash", reiserfs_hashname(code)) ; - code = UNSET_HASH ; - } else if (reiserfs_r5_hash(s) && code != R5_HASH) { - reiserfs_warning (s, "Error, %s hash detected, " - "unable to force r5 hash", reiserfs_hashname(code)) ; - code = UNSET_HASH ; - } - } else { - /* find_hash_out was not called or could not determine the hash */ - if (reiserfs_rupasov_hash(s)) { - code = YURA_HASH ; - } else if (reiserfs_tea_hash(s)) { - code = TEA_HASH ; - } else if (reiserfs_r5_hash(s)) { - code = R5_HASH ; - } - } - - /* if we are mounted RW, and we have a new valid hash code, update - ** the super - */ - if (code != UNSET_HASH && - !(s->s_flags & MS_RDONLY) && - code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { - set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); - } - return code; + __u32 code; + + code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); + + /* reiserfs_hash_detect() == true if any of the hash mount options + ** were used. We must check them to make sure the user isn't + ** using a bad hash value + */ + if (code == UNSET_HASH || reiserfs_hash_detect(s)) + code = find_hash_out(s); + + if (code != UNSET_HASH && reiserfs_hash_detect(s)) { + /* detection has found the hash, and we must check against the + ** mount options + */ + if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { + reiserfs_warning(s, "Error, %s hash detected, " + "unable to force rupasov hash", + reiserfs_hashname(code)); + code = UNSET_HASH; + } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { + reiserfs_warning(s, "Error, %s hash detected, " + "unable to force tea hash", + reiserfs_hashname(code)); + code = UNSET_HASH; + } else if (reiserfs_r5_hash(s) && code != R5_HASH) { + reiserfs_warning(s, "Error, %s hash detected, " + "unable to force r5 hash", + reiserfs_hashname(code)); + code = UNSET_HASH; + } + } else { + /* find_hash_out was not called or could not determine the hash */ + if (reiserfs_rupasov_hash(s)) { + code = YURA_HASH; + } else if (reiserfs_tea_hash(s)) { + code = TEA_HASH; + } else if (reiserfs_r5_hash(s)) { + code = R5_HASH; + } + } + + /* if we are mounted RW, and we have a new valid hash code, update + ** the super + */ + if (code != UNSET_HASH && + !(s->s_flags & MS_RDONLY) && + code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { + set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); + } + return code; } // return pointer to appropriate function -static hashf_t hash_function (struct super_block * s) +static hashf_t hash_function(struct super_block *s) { - switch (what_hash (s)) { - case TEA_HASH: - reiserfs_info (s, "Using tea hash to sort names\n"); - return keyed_hash; - case YURA_HASH: - reiserfs_info (s, "Using rupasov hash to sort names\n"); - return yura_hash; - case R5_HASH: - reiserfs_info (s, "Using r5 hash to sort names\n"); - return r5_hash; - } - return NULL; + switch (what_hash(s)) { + case TEA_HASH: + reiserfs_info(s, "Using tea hash to sort names\n"); + return keyed_hash; + case YURA_HASH: + reiserfs_info(s, "Using rupasov hash to sort names\n"); + return yura_hash; + case R5_HASH: + reiserfs_info(s, "Using r5 hash to sort names\n"); + return r5_hash; + } + return NULL; } // this is used to set up correct value for old partitions -static int function2code (hashf_t func) +static int function2code(hashf_t func) { - if (func == keyed_hash) - return TEA_HASH; - if (func == yura_hash) - return YURA_HASH; - if (func == r5_hash) - return R5_HASH; + if (func == keyed_hash) + return TEA_HASH; + if (func == yura_hash) + return YURA_HASH; + if (func == r5_hash) + return R5_HASH; - BUG() ; // should never happen + BUG(); // should never happen - return 0; + return 0; } #define SWARN(silent, s, ...) \ if (!(silent)) \ reiserfs_warning (s, __VA_ARGS__) -static int reiserfs_fill_super (struct super_block * s, void * data, int silent) +static int reiserfs_fill_super(struct super_block *s, void *data, int silent) { - struct inode *root_inode; - int j; - struct reiserfs_transaction_handle th ; - int old_format = 0; - unsigned long blocks; - unsigned int commit_max_age = 0; - int jinit_done = 0 ; - struct reiserfs_iget_args args ; - struct reiserfs_super_block * rs; - char *jdev_name; - struct reiserfs_sb_info *sbi; - int errval = -EINVAL; - - sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); - if (!sbi) { - errval = -ENOMEM; - goto error; - } - s->s_fs_info = sbi; - memset (sbi, 0, sizeof (struct reiserfs_sb_info)); - /* Set default values for options: non-aggressive tails, RO on errors */ - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); - /* no preallocation minimum, be smart in - reiserfs_file_write instead */ - REISERFS_SB(s)->s_alloc_options.preallocmin = 0; - /* Preallocate by 16 blocks (17-1) at once */ - REISERFS_SB(s)->s_alloc_options.preallocsize = 17; - /* Initialize the rwsem for xattr dir */ - init_rwsem(&REISERFS_SB(s)->xattr_dir_sem); - - /* setup default block allocator options */ - reiserfs_init_alloc_options(s); - - jdev_name = NULL; - if (reiserfs_parse_options (s, (char *) data, &(sbi->s_mount_opt), &blocks, &jdev_name, &commit_max_age) == 0) { - goto error; - } - - if (blocks) { - SWARN (silent, s, "jmacd-7: reiserfs_fill_super: resize option " - "for remount only"); - goto error; - } - - /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ - if (!read_super_block (s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) - old_format = 1; - /* try new format (64-th 1k block), which can contain reiserfs super block */ - else if (read_super_block (s, REISERFS_DISK_OFFSET_IN_BYTES)) { - SWARN(silent, s, "sh-2021: reiserfs_fill_super: can not find reiserfs on %s", reiserfs_bdevname (s)); - goto error; - } - - rs = SB_DISK_SUPER_BLOCK (s); - /* Let's do basic sanity check to verify that underlying device is not - smaller than the filesystem. If the check fails then abort and scream, - because bad stuff will happen otherwise. */ - if ( s->s_bdev && s->s_bdev->bd_inode && i_size_read(s->s_bdev->bd_inode) < sb_block_count(rs)*sb_blocksize(rs)) { - SWARN (silent, s, "Filesystem on %s cannot be mounted because it is bigger than the device", reiserfs_bdevname(s)); - SWARN(silent, s, "You may need to run fsck or increase size of your LVM partition"); - SWARN(silent, s, "Or may be you forgot to reboot after fdisk when it told you to"); - goto error; - } - - sbi->s_mount_state = SB_REISERFS_STATE(s); - sbi->s_mount_state = REISERFS_VALID_FS ; - - if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) { - SWARN(silent, s, "jmacd-8: reiserfs_fill_super: unable to read bitmap"); - goto error; - } + struct inode *root_inode; + int j; + struct reiserfs_transaction_handle th; + int old_format = 0; + unsigned long blocks; + unsigned int commit_max_age = 0; + int jinit_done = 0; + struct reiserfs_iget_args args; + struct reiserfs_super_block *rs; + char *jdev_name; + struct reiserfs_sb_info *sbi; + int errval = -EINVAL; + + sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); + if (!sbi) { + errval = -ENOMEM; + goto error; + } + s->s_fs_info = sbi; + memset(sbi, 0, sizeof(struct reiserfs_sb_info)); + /* Set default values for options: non-aggressive tails, RO on errors */ + REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); + REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); + /* no preallocation minimum, be smart in + reiserfs_file_write instead */ + REISERFS_SB(s)->s_alloc_options.preallocmin = 0; + /* Preallocate by 16 blocks (17-1) at once */ + REISERFS_SB(s)->s_alloc_options.preallocsize = 17; + /* Initialize the rwsem for xattr dir */ + init_rwsem(&REISERFS_SB(s)->xattr_dir_sem); + + /* setup default block allocator options */ + reiserfs_init_alloc_options(s); + + jdev_name = NULL; + if (reiserfs_parse_options + (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, + &commit_max_age) == 0) { + goto error; + } + + if (blocks) { + SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " + "for remount only"); + goto error; + } + + /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ + if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) + old_format = 1; + /* try new format (64-th 1k block), which can contain reiserfs super block */ + else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { + SWARN(silent, s, + "sh-2021: reiserfs_fill_super: can not find reiserfs on %s", + reiserfs_bdevname(s)); + goto error; + } + + rs = SB_DISK_SUPER_BLOCK(s); + /* Let's do basic sanity check to verify that underlying device is not + smaller than the filesystem. If the check fails then abort and scream, + because bad stuff will happen otherwise. */ + if (s->s_bdev && s->s_bdev->bd_inode + && i_size_read(s->s_bdev->bd_inode) < + sb_block_count(rs) * sb_blocksize(rs)) { + SWARN(silent, s, + "Filesystem on %s cannot be mounted because it is bigger than the device", + reiserfs_bdevname(s)); + SWARN(silent, s, + "You may need to run fsck or increase size of your LVM partition"); + SWARN(silent, s, + "Or may be you forgot to reboot after fdisk when it told you to"); + goto error; + } + + sbi->s_mount_state = SB_REISERFS_STATE(s); + sbi->s_mount_state = REISERFS_VALID_FS; + + if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) { + SWARN(silent, s, + "jmacd-8: reiserfs_fill_super: unable to read bitmap"); + goto error; + } #ifdef CONFIG_REISERFS_CHECK - SWARN (silent, s, "CONFIG_REISERFS_CHECK is set ON"); - SWARN (silent, s, "- it is slow mode for debugging."); + SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON"); + SWARN(silent, s, "- it is slow mode for debugging."); #endif - /* make data=ordered the default */ - if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && - !reiserfs_data_writeback(s)) - { - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); - } - - if (reiserfs_data_log(s)) { - reiserfs_info (s, "using journaled data mode\n"); - } else if (reiserfs_data_ordered(s)) { - reiserfs_info (s, "using ordered data mode\n"); - } else { - reiserfs_info (s, "using writeback data mode\n"); - } - if (reiserfs_barrier_flush(s)) { - printk("reiserfs: using flush barriers\n"); - } - - // set_device_ro(s->s_dev, 1) ; - if( journal_init(s, jdev_name, old_format, commit_max_age) ) { - SWARN(silent, s, "sh-2022: reiserfs_fill_super: unable to initialize journal space") ; - goto error ; - } else { - jinit_done = 1 ; /* once this is set, journal_release must be called - ** if we error out of the mount - */ - } - if (reread_meta_blocks(s)) { - SWARN(silent, s, "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init") ; - goto error ; - } - - if (replay_only (s)) - goto error; - - if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { - SWARN(silent, s, "clm-7000: Detected readonly device, marking FS readonly") ; - s->s_flags |= MS_RDONLY ; - } - args.objectid = REISERFS_ROOT_OBJECTID ; - args.dirid = REISERFS_ROOT_PARENT_OBJECTID ; - root_inode = iget5_locked (s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); - if (!root_inode) { - SWARN(silent, s, "jmacd-10: reiserfs_fill_super: get root inode failed"); - goto error; - } - - if (root_inode->i_state & I_NEW) { - reiserfs_read_locked_inode(root_inode, &args); - unlock_new_inode(root_inode); - } - - s->s_root = d_alloc_root(root_inode); - if (!s->s_root) { - iput(root_inode); - goto error; - } - - // define and initialize hash function - sbi->s_hash_function = hash_function (s); - if (sbi->s_hash_function == NULL) { - dput(s->s_root) ; - s->s_root = NULL ; - goto error ; - } - - if (is_reiserfs_3_5 (rs) || (is_reiserfs_jr (rs) && SB_VERSION (s) == REISERFS_VERSION_1)) - set_bit(REISERFS_3_5, &(sbi->s_properties)); - else - set_bit(REISERFS_3_6, &(sbi->s_properties)); - - if (!(s->s_flags & MS_RDONLY)) { - - errval = journal_begin(&th, s, 1) ; - if (errval) { - dput (s->s_root); - s->s_root = NULL; - goto error; - } - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; - - set_sb_umount_state( rs, REISERFS_ERROR_FS ); - set_sb_fs_state (rs, 0); - - if (old_format_only(s)) { - /* filesystem of format 3.5 either with standard or non-standard - journal */ - if (convert_reiserfs (s)) { - /* and -o conv is given */ - if(!silent) - reiserfs_info (s,"converting 3.5 filesystem to the 3.6 format") ; - - if (is_reiserfs_3_5 (rs)) - /* put magic string of 3.6 format. 2.2 will not be able to - mount this filesystem anymore */ - memcpy (rs->s_v1.s_magic, reiserfs_3_6_magic_string, - sizeof (reiserfs_3_6_magic_string)); - - set_sb_version(rs,REISERFS_VERSION_2); - reiserfs_convert_objectid_map_v1(s) ; - set_bit(REISERFS_3_6, &(sbi->s_properties)); - clear_bit(REISERFS_3_5, &(sbi->s_properties)); - } else if (!silent){ - reiserfs_info (s, "using 3.5.x disk format\n") ; - } - } - - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); - errval = journal_end(&th, s, 1) ; - if (errval) { - dput (s->s_root); - s->s_root = NULL; - goto error; - } - - if ((errval = reiserfs_xattr_init (s, s->s_flags))) { - dput (s->s_root); - s->s_root = NULL; - goto error; - } - - /* look for files which were to be removed in previous session */ - finish_unfinished (s); - } else { - if ( old_format_only(s) && !silent) { - reiserfs_info (s, "using 3.5.x disk format\n") ; - } - - if ((errval = reiserfs_xattr_init (s, s->s_flags))) { - dput (s->s_root); - s->s_root = NULL; - goto error; - } - } - // mark hash in super block: it could be unset. overwrite should be ok - set_sb_hash_function_code( rs, function2code(sbi->s_hash_function ) ); - - handle_attrs( s ); - - reiserfs_proc_info_init( s ); - - init_waitqueue_head (&(sbi->s_wait)); - spin_lock_init(&sbi->bitmap_lock); - - return (0); - - error: - if (jinit_done) { /* kill the commit thread, free journal ram */ - journal_release_error(NULL, s) ; - } - if (SB_DISK_SUPER_BLOCK (s)) { - for (j = 0; j < SB_BMAP_NR (s); j ++) { - if (SB_AP_BITMAP (s)) - brelse (SB_AP_BITMAP (s)[j].bh); - } - if (SB_AP_BITMAP (s)) - vfree (SB_AP_BITMAP (s)); - } - if (SB_BUFFER_WITH_SB (s)) - brelse(SB_BUFFER_WITH_SB (s)); + /* make data=ordered the default */ + if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && + !reiserfs_data_writeback(s)) { + REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); + } + + if (reiserfs_data_log(s)) { + reiserfs_info(s, "using journaled data mode\n"); + } else if (reiserfs_data_ordered(s)) { + reiserfs_info(s, "using ordered data mode\n"); + } else { + reiserfs_info(s, "using writeback data mode\n"); + } + if (reiserfs_barrier_flush(s)) { + printk("reiserfs: using flush barriers\n"); + } + // set_device_ro(s->s_dev, 1) ; + if (journal_init(s, jdev_name, old_format, commit_max_age)) { + SWARN(silent, s, + "sh-2022: reiserfs_fill_super: unable to initialize journal space"); + goto error; + } else { + jinit_done = 1; /* once this is set, journal_release must be called + ** if we error out of the mount + */ + } + if (reread_meta_blocks(s)) { + SWARN(silent, s, + "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init"); + goto error; + } + + if (replay_only(s)) + goto error; + + if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { + SWARN(silent, s, + "clm-7000: Detected readonly device, marking FS readonly"); + s->s_flags |= MS_RDONLY; + } + args.objectid = REISERFS_ROOT_OBJECTID; + args.dirid = REISERFS_ROOT_PARENT_OBJECTID; + root_inode = + iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, + reiserfs_init_locked_inode, (void *)(&args)); + if (!root_inode) { + SWARN(silent, s, + "jmacd-10: reiserfs_fill_super: get root inode failed"); + goto error; + } + + if (root_inode->i_state & I_NEW) { + reiserfs_read_locked_inode(root_inode, &args); + unlock_new_inode(root_inode); + } + + s->s_root = d_alloc_root(root_inode); + if (!s->s_root) { + iput(root_inode); + goto error; + } + // define and initialize hash function + sbi->s_hash_function = hash_function(s); + if (sbi->s_hash_function == NULL) { + dput(s->s_root); + s->s_root = NULL; + goto error; + } + + if (is_reiserfs_3_5(rs) + || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) + set_bit(REISERFS_3_5, &(sbi->s_properties)); + else + set_bit(REISERFS_3_6, &(sbi->s_properties)); + + if (!(s->s_flags & MS_RDONLY)) { + + errval = journal_begin(&th, s, 1); + if (errval) { + dput(s->s_root); + s->s_root = NULL; + goto error; + } + reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); + + set_sb_umount_state(rs, REISERFS_ERROR_FS); + set_sb_fs_state(rs, 0); + + if (old_format_only(s)) { + /* filesystem of format 3.5 either with standard or non-standard + journal */ + if (convert_reiserfs(s)) { + /* and -o conv is given */ + if (!silent) + reiserfs_info(s, + "converting 3.5 filesystem to the 3.6 format"); + + if (is_reiserfs_3_5(rs)) + /* put magic string of 3.6 format. 2.2 will not be able to + mount this filesystem anymore */ + memcpy(rs->s_v1.s_magic, + reiserfs_3_6_magic_string, + sizeof + (reiserfs_3_6_magic_string)); + + set_sb_version(rs, REISERFS_VERSION_2); + reiserfs_convert_objectid_map_v1(s); + set_bit(REISERFS_3_6, &(sbi->s_properties)); + clear_bit(REISERFS_3_5, &(sbi->s_properties)); + } else if (!silent) { + reiserfs_info(s, "using 3.5.x disk format\n"); + } + } + + journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); + errval = journal_end(&th, s, 1); + if (errval) { + dput(s->s_root); + s->s_root = NULL; + goto error; + } + + if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + dput(s->s_root); + s->s_root = NULL; + goto error; + } + + /* look for files which were to be removed in previous session */ + finish_unfinished(s); + } else { + if (old_format_only(s) && !silent) { + reiserfs_info(s, "using 3.5.x disk format\n"); + } + + if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + dput(s->s_root); + s->s_root = NULL; + goto error; + } + } + // mark hash in super block: it could be unset. overwrite should be ok + set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); + + handle_attrs(s); + + reiserfs_proc_info_init(s); + + init_waitqueue_head(&(sbi->s_wait)); + spin_lock_init(&sbi->bitmap_lock); + + return (0); + + error: + if (jinit_done) { /* kill the commit thread, free journal ram */ + journal_release_error(NULL, s); + } + if (SB_DISK_SUPER_BLOCK(s)) { + for (j = 0; j < SB_BMAP_NR(s); j++) { + if (SB_AP_BITMAP(s)) + brelse(SB_AP_BITMAP(s)[j].bh); + } + if (SB_AP_BITMAP(s)) + vfree(SB_AP_BITMAP(s)); + } + if (SB_BUFFER_WITH_SB(s)) + brelse(SB_BUFFER_WITH_SB(s)); #ifdef CONFIG_QUOTA - for (j = 0; j < MAXQUOTAS; j++) { - if (sbi->s_qf_names[j]) - kfree(sbi->s_qf_names[j]); - } + for (j = 0; j < MAXQUOTAS; j++) { + if (sbi->s_qf_names[j]) + kfree(sbi->s_qf_names[j]); + } #endif - if (sbi != NULL) { - kfree(sbi); - } + if (sbi != NULL) { + kfree(sbi); + } - s->s_fs_info = NULL; - return errval; + s->s_fs_info = NULL; + return errval; } - -static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf) +static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf) { - struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); - - buf->f_namelen = (REISERFS_MAX_NAME (s->s_blocksize)); - buf->f_bfree = sb_free_blocks(rs); - buf->f_bavail = buf->f_bfree; - buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; - buf->f_bsize = s->s_blocksize; - /* changed to accommodate gcc folks.*/ - buf->f_type = REISERFS_SUPER_MAGIC; - return 0; + struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); + + buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize)); + buf->f_bfree = sb_free_blocks(rs); + buf->f_bavail = buf->f_bfree; + buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; + buf->f_bsize = s->s_blocksize; + /* changed to accommodate gcc folks. */ + buf->f_type = REISERFS_SUPER_MAGIC; + return 0; } #ifdef CONFIG_QUOTA static int reiserfs_dquot_initialize(struct inode *inode, int type) { - struct reiserfs_transaction_handle th; - int ret, err; - - /* We may create quota structure so we need to reserve enough blocks */ - reiserfs_write_lock(inode->i_sb); - ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)); - if (ret) - goto out; - ret = dquot_initialize(inode, type); - err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(inode->i_sb); - return ret; + struct reiserfs_transaction_handle th; + int ret, err; + + /* We may create quota structure so we need to reserve enough blocks */ + reiserfs_write_lock(inode->i_sb); + ret = + journal_begin(&th, inode->i_sb, + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)); + if (ret) + goto out; + ret = dquot_initialize(inode, type); + err = + journal_end(&th, inode->i_sb, + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)); + if (!ret && err) + ret = err; + out: + reiserfs_write_unlock(inode->i_sb); + return ret; } static int reiserfs_dquot_drop(struct inode *inode) { - struct reiserfs_transaction_handle th; - int ret, err; - - /* We may delete quota structure so we need to reserve enough blocks */ - reiserfs_write_lock(inode->i_sb); - ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); - if (ret) - goto out; - ret = dquot_drop(inode); - err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(inode->i_sb); - return ret; + struct reiserfs_transaction_handle th; + int ret, err; + + /* We may delete quota structure so we need to reserve enough blocks */ + reiserfs_write_lock(inode->i_sb); + ret = + journal_begin(&th, inode->i_sb, + 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); + if (ret) + goto out; + ret = dquot_drop(inode); + err = + journal_end(&th, inode->i_sb, + 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); + if (!ret && err) + ret = err; + out: + reiserfs_write_unlock(inode->i_sb); + return ret; } static int reiserfs_write_dquot(struct dquot *dquot) { - struct reiserfs_transaction_handle th; - int ret, err; - - reiserfs_write_lock(dquot->dq_sb); - ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); - if (ret) - goto out; - ret = dquot_commit(dquot); - err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(dquot->dq_sb); - return ret; + struct reiserfs_transaction_handle th; + int ret, err; + + reiserfs_write_lock(dquot->dq_sb); + ret = + journal_begin(&th, dquot->dq_sb, + REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + if (ret) + goto out; + ret = dquot_commit(dquot); + err = + journal_end(&th, dquot->dq_sb, + REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + if (!ret && err) + ret = err; + out: + reiserfs_write_unlock(dquot->dq_sb); + return ret; } static int reiserfs_acquire_dquot(struct dquot *dquot) { - struct reiserfs_transaction_handle th; - int ret, err; - - reiserfs_write_lock(dquot->dq_sb); - ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); - if (ret) - goto out; - ret = dquot_acquire(dquot); - err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(dquot->dq_sb); - return ret; + struct reiserfs_transaction_handle th; + int ret, err; + + reiserfs_write_lock(dquot->dq_sb); + ret = + journal_begin(&th, dquot->dq_sb, + REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + if (ret) + goto out; + ret = dquot_acquire(dquot); + err = + journal_end(&th, dquot->dq_sb, + REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + if (!ret && err) + ret = err; + out: + reiserfs_write_unlock(dquot->dq_sb); + return ret; } static int reiserfs_release_dquot(struct dquot *dquot) { - struct reiserfs_transaction_handle th; - int ret, err; - - reiserfs_write_lock(dquot->dq_sb); - ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (ret) - goto out; - ret = dquot_release(dquot); - err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(dquot->dq_sb); - return ret; + struct reiserfs_transaction_handle th; + int ret, err; + + reiserfs_write_lock(dquot->dq_sb); + ret = + journal_begin(&th, dquot->dq_sb, + REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + if (ret) + goto out; + ret = dquot_release(dquot); + err = + journal_end(&th, dquot->dq_sb, + REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + if (!ret && err) + ret = err; + out: + reiserfs_write_unlock(dquot->dq_sb); + return ret; } static int reiserfs_mark_dquot_dirty(struct dquot *dquot) { - /* Are we journalling quotas? */ - if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { - dquot_mark_dquot_dirty(dquot); - return reiserfs_write_dquot(dquot); - } - else - return dquot_mark_dquot_dirty(dquot); + /* Are we journalling quotas? */ + if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || + REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { + dquot_mark_dquot_dirty(dquot); + return reiserfs_write_dquot(dquot); + } else + return dquot_mark_dquot_dirty(dquot); } static int reiserfs_write_info(struct super_block *sb, int type) { - struct reiserfs_transaction_handle th; - int ret, err; - - /* Data block + inode block */ - reiserfs_write_lock(sb); - ret = journal_begin(&th, sb, 2); - if (ret) - goto out; - ret = dquot_commit_info(sb, type); - err = journal_end(&th, sb, 2); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(sb); - return ret; + struct reiserfs_transaction_handle th; + int ret, err; + + /* Data block + inode block */ + reiserfs_write_lock(sb); + ret = journal_begin(&th, sb, 2); + if (ret) + goto out; + ret = dquot_commit_info(sb, type); + err = journal_end(&th, sb, 2); + if (!ret && err) + ret = err; + out: + reiserfs_write_unlock(sb); + return ret; } /* @@ -1977,45 +2116,48 @@ out: static int reiserfs_quota_on_mount(struct super_block *sb, int type) { return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], - REISERFS_SB(sb)->s_jquota_fmt, type); + REISERFS_SB(sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ -static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, char *path) +static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, + char *path) { - int err; - struct nameidata nd; - - if (!(REISERFS_SB(sb)->s_mount_opt & (1<mnt_sb != sb) { - path_release(&nd); - return -EXDEV; - } - /* We must not pack tails for quota files on reiserfs for quota IO to work */ - if (!REISERFS_I(nd.dentry->d_inode)->i_flags & i_nopack_mask) { - reiserfs_warning(sb, "reiserfs: Quota file must have tail packing disabled."); - path_release(&nd); - return -EINVAL; - } - /* Not journalling quota? No more tests needed... */ - if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && - !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { + int err; + struct nameidata nd; + + if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) + return -EINVAL; + err = path_lookup(path, LOOKUP_FOLLOW, &nd); + if (err) + return err; + /* Quotafile not on the same filesystem? */ + if (nd.mnt->mnt_sb != sb) { + path_release(&nd); + return -EXDEV; + } + /* We must not pack tails for quota files on reiserfs for quota IO to work */ + if (!REISERFS_I(nd.dentry->d_inode)->i_flags & i_nopack_mask) { + reiserfs_warning(sb, + "reiserfs: Quota file must have tail packing disabled."); + path_release(&nd); + return -EINVAL; + } + /* Not journalling quota? No more tests needed... */ + if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && + !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { + path_release(&nd); + return vfs_quota_on(sb, type, format_id, path); + } + /* Quotafile not of fs root? */ + if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) + reiserfs_warning(sb, + "reiserfs: Quota file not on filesystem root. " + "Journalled quota will not work."); path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); - } - /* Quotafile not of fs root? */ - if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) - reiserfs_warning(sb, "reiserfs: Quota file not on filesystem root. " - "Journalled quota will not work."); - path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(sb, type, format_id, path); } /* Read data from quotafile - avoid pagecache and such because we cannot afford @@ -2025,42 +2167,44 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; - unsigned long blk = off >> sb->s_blocksize_bits; - int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; - size_t toread; - struct buffer_head tmp_bh, *bh; - loff_t i_size = i_size_read(inode); - - if (off > i_size) - return 0; - if (off+len > i_size) - len = i_size-off; - toread = len; - while (toread > 0) { - tocopy = sb->s_blocksize - offset < toread ? sb->s_blocksize - offset : toread; - tmp_bh.b_state = 0; - /* Quota files are without tails so we can safely use this function */ - reiserfs_write_lock(sb); - err = reiserfs_get_block(inode, blk, &tmp_bh, 0); - reiserfs_write_unlock(sb); - if (err) - return err; - if (!buffer_mapped(&tmp_bh)) /* A hole? */ - memset(data, 0, tocopy); - else { - bh = sb_bread(sb, tmp_bh.b_blocknr); - if (!bh) - return -EIO; - memcpy(data, bh->b_data+offset, tocopy); - brelse(bh); - } - offset = 0; - toread -= tocopy; - data += tocopy; - blk++; - } - return len; + struct inode *inode = sb_dqopt(sb)->files[type]; + unsigned long blk = off >> sb->s_blocksize_bits; + int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; + size_t toread; + struct buffer_head tmp_bh, *bh; + loff_t i_size = i_size_read(inode); + + if (off > i_size) + return 0; + if (off + len > i_size) + len = i_size - off; + toread = len; + while (toread > 0) { + tocopy = + sb->s_blocksize - offset < + toread ? sb->s_blocksize - offset : toread; + tmp_bh.b_state = 0; + /* Quota files are without tails so we can safely use this function */ + reiserfs_write_lock(sb); + err = reiserfs_get_block(inode, blk, &tmp_bh, 0); + reiserfs_write_unlock(sb); + if (err) + return err; + if (!buffer_mapped(&tmp_bh)) /* A hole? */ + memset(data, 0, tocopy); + else { + bh = sb_bread(sb, tmp_bh.b_blocknr); + if (!bh) + return -EIO; + memcpy(data, bh->b_data + offset, tocopy); + brelse(bh); + } + offset = 0; + toread -= tocopy; + data += tocopy; + blk++; + } + return len; } /* Write to quotafile (we know the transaction is already started and has @@ -2068,117 +2212,116 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, static ssize_t reiserfs_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; - unsigned long blk = off >> sb->s_blocksize_bits; - int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; - int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; - size_t towrite = len; - struct buffer_head tmp_bh, *bh; - - down(&inode->i_sem); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - tmp_bh.b_state = 0; - err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); - if (err) - goto out; - if (offset || tocopy != sb->s_blocksize) - bh = sb_bread(sb, tmp_bh.b_blocknr); - else - bh = sb_getblk(sb, tmp_bh.b_blocknr); - if (!bh) { - err = -EIO; - goto out; - } - lock_buffer(bh); - memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); - set_buffer_uptodate(bh); - unlock_buffer(bh); - reiserfs_prepare_for_journal(sb, bh, 1); - journal_mark_dirty(current->journal_info, sb, bh); - if (!journal_quota) - reiserfs_add_ordered_list(inode, bh); - brelse(bh); - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; - } -out: - if (len == towrite) - return err; - if (inode->i_size < off+len-towrite) - i_size_write(inode, off+len-towrite); - inode->i_version++; - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); - up(&inode->i_sem); - return len - towrite; + struct inode *inode = sb_dqopt(sb)->files[type]; + unsigned long blk = off >> sb->s_blocksize_bits; + int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; + int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; + size_t towrite = len; + struct buffer_head tmp_bh, *bh; + + down(&inode->i_sem); + while (towrite > 0) { + tocopy = sb->s_blocksize - offset < towrite ? + sb->s_blocksize - offset : towrite; + tmp_bh.b_state = 0; + err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); + if (err) + goto out; + if (offset || tocopy != sb->s_blocksize) + bh = sb_bread(sb, tmp_bh.b_blocknr); + else + bh = sb_getblk(sb, tmp_bh.b_blocknr); + if (!bh) { + err = -EIO; + goto out; + } + lock_buffer(bh); + memcpy(bh->b_data + offset, data, tocopy); + flush_dcache_page(bh->b_page); + set_buffer_uptodate(bh); + unlock_buffer(bh); + reiserfs_prepare_for_journal(sb, bh, 1); + journal_mark_dirty(current->journal_info, sb, bh); + if (!journal_quota) + reiserfs_add_ordered_list(inode, bh); + brelse(bh); + offset = 0; + towrite -= tocopy; + data += tocopy; + blk++; + } + out: + if (len == towrite) + return err; + if (inode->i_size < off + len - towrite) + i_size_write(inode, off + len - towrite); + inode->i_version++; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + up(&inode->i_sem); + return len - towrite; } #endif -static struct super_block* -get_super_block (struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static struct super_block *get_super_block(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) { return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); } -static int __init -init_reiserfs_fs ( void ) +static int __init init_reiserfs_fs(void) { int ret; - if ((ret = init_inodecache ())) { + if ((ret = init_inodecache())) { return ret; } - if ((ret = reiserfs_xattr_register_handlers ())) - goto failed_reiserfs_xattr_register_handlers; + if ((ret = reiserfs_xattr_register_handlers())) + goto failed_reiserfs_xattr_register_handlers; - reiserfs_proc_info_global_init (); - reiserfs_proc_register_global ("version", reiserfs_global_version_in_proc); + reiserfs_proc_info_global_init(); + reiserfs_proc_register_global("version", + reiserfs_global_version_in_proc); - ret = register_filesystem (& reiserfs_fs_type); + ret = register_filesystem(&reiserfs_fs_type); if (ret == 0) { return 0; } - reiserfs_xattr_unregister_handlers (); + reiserfs_xattr_unregister_handlers(); -failed_reiserfs_xattr_register_handlers: - reiserfs_proc_unregister_global ("version"); - reiserfs_proc_info_global_done (); - destroy_inodecache (); + failed_reiserfs_xattr_register_handlers: + reiserfs_proc_unregister_global("version"); + reiserfs_proc_info_global_done(); + destroy_inodecache(); return ret; } -static void __exit -exit_reiserfs_fs ( void ) +static void __exit exit_reiserfs_fs(void) { - reiserfs_xattr_unregister_handlers (); - reiserfs_proc_unregister_global ("version"); - reiserfs_proc_info_global_done (); - unregister_filesystem (& reiserfs_fs_type); - destroy_inodecache (); + reiserfs_xattr_unregister_handlers(); + reiserfs_proc_unregister_global("version"); + reiserfs_proc_info_global_done(); + unregister_filesystem(&reiserfs_fs_type); + destroy_inodecache(); } struct file_system_type reiserfs_fs_type = { - .owner = THIS_MODULE, - .name = "reiserfs", - .get_sb = get_super_block, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "reiserfs", + .get_sb = get_super_block, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, }; -MODULE_DESCRIPTION ("ReiserFS journaled filesystem"); -MODULE_AUTHOR ("Hans Reiser "); -MODULE_LICENSE ("GPL"); +MODULE_DESCRIPTION("ReiserFS journaled filesystem"); +MODULE_AUTHOR("Hans Reiser "); +MODULE_LICENSE("GPL"); -module_init (init_reiserfs_fs); -module_exit (exit_reiserfs_fs); +module_init(init_reiserfs_fs); +module_exit(exit_reiserfs_fs); diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c index 6191909d516..c92e124f628 100644 --- a/fs/reiserfs/tail_conversion.c +++ b/fs/reiserfs/tail_conversion.c @@ -11,156 +11,159 @@ /* access to tail : when one is going to read tail it must make sure, that is not running. direct2indirect and indirect2direct can not run concurrently */ - /* Converts direct items to an unformatted node. Panics if file has no tail. -ENOSPC if no disk space for conversion */ /* path points to first direct item of the file regarless of how many of them are there */ -int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inode, - struct path * path, struct buffer_head * unbh, - loff_t tail_offset) +int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, + struct path *path, struct buffer_head *unbh, + loff_t tail_offset) { - struct super_block * sb = inode->i_sb; - struct buffer_head *up_to_date_bh ; - struct item_head * p_le_ih = PATH_PITEM_HEAD (path); - unsigned long total_tail = 0 ; - struct cpu_key end_key; /* Key to search for the last byte of the - converted item. */ - struct item_head ind_ih; /* new indirect item to be inserted or - key of unfm pointer to be pasted */ - int n_blk_size, - n_retval; /* returned value for reiserfs_insert_item and clones */ - unp_t unfm_ptr; /* Handle on an unformatted node - that will be inserted in the - tree. */ - - BUG_ON (!th->t_trans_id); - - REISERFS_SB(sb)->s_direct2indirect ++; - - n_blk_size = sb->s_blocksize; - - /* and key to search for append or insert pointer to the new - unformatted node. */ - copy_item_head (&ind_ih, p_le_ih); - set_le_ih_k_offset (&ind_ih, tail_offset); - set_le_ih_k_type (&ind_ih, TYPE_INDIRECT); - - /* Set the key to search for the place for new unfm pointer */ - make_cpu_key (&end_key, inode, tail_offset, TYPE_INDIRECT, 4); - - // FIXME: we could avoid this - if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND ) { - reiserfs_warning (sb, "PAP-14030: direct2indirect: " - "pasted or inserted byte exists in the tree %K. " - "Use fsck to repair.", &end_key); - pathrelse(path); - return -EIO; - } - - p_le_ih = PATH_PITEM_HEAD (path); - - unfm_ptr = cpu_to_le32 (unbh->b_blocknr); - - if ( is_statdata_le_ih (p_le_ih) ) { - /* Insert new indirect item. */ - set_ih_free_space (&ind_ih, 0); /* delete at nearest future */ - put_ih_item_len( &ind_ih, UNFM_P_SIZE ); - PATH_LAST_POSITION (path)++; - n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih, inode, + struct super_block *sb = inode->i_sb; + struct buffer_head *up_to_date_bh; + struct item_head *p_le_ih = PATH_PITEM_HEAD(path); + unsigned long total_tail = 0; + struct cpu_key end_key; /* Key to search for the last byte of the + converted item. */ + struct item_head ind_ih; /* new indirect item to be inserted or + key of unfm pointer to be pasted */ + int n_blk_size, n_retval; /* returned value for reiserfs_insert_item and clones */ + unp_t unfm_ptr; /* Handle on an unformatted node + that will be inserted in the + tree. */ + + BUG_ON(!th->t_trans_id); + + REISERFS_SB(sb)->s_direct2indirect++; + + n_blk_size = sb->s_blocksize; + + /* and key to search for append or insert pointer to the new + unformatted node. */ + copy_item_head(&ind_ih, p_le_ih); + set_le_ih_k_offset(&ind_ih, tail_offset); + set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); + + /* Set the key to search for the place for new unfm pointer */ + make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4); + + // FIXME: we could avoid this + if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) { + reiserfs_warning(sb, "PAP-14030: direct2indirect: " + "pasted or inserted byte exists in the tree %K. " + "Use fsck to repair.", &end_key); + pathrelse(path); + return -EIO; + } + + p_le_ih = PATH_PITEM_HEAD(path); + + unfm_ptr = cpu_to_le32(unbh->b_blocknr); + + if (is_statdata_le_ih(p_le_ih)) { + /* Insert new indirect item. */ + set_ih_free_space(&ind_ih, 0); /* delete at nearest future */ + put_ih_item_len(&ind_ih, UNFM_P_SIZE); + PATH_LAST_POSITION(path)++; + n_retval = + reiserfs_insert_item(th, path, &end_key, &ind_ih, inode, (char *)&unfm_ptr); - } else { - /* Paste into last indirect item of an object. */ - n_retval = reiserfs_paste_into_item(th, path, &end_key, inode, - (char *)&unfm_ptr, UNFM_P_SIZE); - } - if ( n_retval ) { - return n_retval; - } - - // note: from here there are two keys which have matching first - // three key components. They only differ by the fourth one. - - - /* Set the key to search for the direct items of the file */ - make_cpu_key (&end_key, inode, max_reiserfs_offset (inode), TYPE_DIRECT, 4); - - /* Move bytes from the direct items to the new unformatted node - and delete them. */ - while (1) { - int tail_size; - - /* end_key.k_offset is set so, that we will always have found - last item of the file */ - if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND ) - reiserfs_panic (sb, "PAP-14050: direct2indirect: " - "direct item (%K) not found", &end_key); - p_le_ih = PATH_PITEM_HEAD (path); - RFALSE( !is_direct_le_ih (p_le_ih), - "vs-14055: direct item expected(%K), found %h", - &end_key, p_le_ih); - tail_size = (le_ih_k_offset (p_le_ih) & (n_blk_size - 1)) - + ih_item_len(p_le_ih) - 1; - - /* we only send the unbh pointer if the buffer is not up to date. - ** this avoids overwriting good data from writepage() with old data - ** from the disk or buffer cache - ** Special case: unbh->b_page will be NULL if we are coming through - ** DIRECT_IO handler here. - */ - if (!unbh->b_page || buffer_uptodate(unbh) || PageUptodate(unbh->b_page)) { - up_to_date_bh = NULL ; } else { - up_to_date_bh = unbh ; + /* Paste into last indirect item of an object. */ + n_retval = reiserfs_paste_into_item(th, path, &end_key, inode, + (char *)&unfm_ptr, + UNFM_P_SIZE); } - n_retval = reiserfs_delete_item (th, path, &end_key, inode, - up_to_date_bh) ; - - total_tail += n_retval ; - if (tail_size == n_retval) - // done: file does not have direct items anymore - break; - - } - /* if we've copied bytes from disk into the page, we need to zero - ** out the unused part of the block (it was not up to date before) - */ - if (up_to_date_bh) { - unsigned pgoff = (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); - char *kaddr=kmap_atomic(up_to_date_bh->b_page, KM_USER0); - memset(kaddr + pgoff, 0, n_blk_size - total_tail) ; - kunmap_atomic(kaddr, KM_USER0); - } - - REISERFS_I(inode)->i_first_direct_byte = U32_MAX; - - return 0; -} + if (n_retval) { + return n_retval; + } + // note: from here there are two keys which have matching first + // three key components. They only differ by the fourth one. + + /* Set the key to search for the direct items of the file */ + make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, + 4); + + /* Move bytes from the direct items to the new unformatted node + and delete them. */ + while (1) { + int tail_size; + + /* end_key.k_offset is set so, that we will always have found + last item of the file */ + if (search_for_position_by_key(sb, &end_key, path) == + POSITION_FOUND) + reiserfs_panic(sb, + "PAP-14050: direct2indirect: " + "direct item (%K) not found", &end_key); + p_le_ih = PATH_PITEM_HEAD(path); + RFALSE(!is_direct_le_ih(p_le_ih), + "vs-14055: direct item expected(%K), found %h", + &end_key, p_le_ih); + tail_size = (le_ih_k_offset(p_le_ih) & (n_blk_size - 1)) + + ih_item_len(p_le_ih) - 1; + + /* we only send the unbh pointer if the buffer is not up to date. + ** this avoids overwriting good data from writepage() with old data + ** from the disk or buffer cache + ** Special case: unbh->b_page will be NULL if we are coming through + ** DIRECT_IO handler here. + */ + if (!unbh->b_page || buffer_uptodate(unbh) + || PageUptodate(unbh->b_page)) { + up_to_date_bh = NULL; + } else { + up_to_date_bh = unbh; + } + n_retval = reiserfs_delete_item(th, path, &end_key, inode, + up_to_date_bh); + + total_tail += n_retval; + if (tail_size == n_retval) + // done: file does not have direct items anymore + break; + } + /* if we've copied bytes from disk into the page, we need to zero + ** out the unused part of the block (it was not up to date before) + */ + if (up_to_date_bh) { + unsigned pgoff = + (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); + char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0); + memset(kaddr + pgoff, 0, n_blk_size - total_tail); + kunmap_atomic(kaddr, KM_USER0); + } + + REISERFS_I(inode)->i_first_direct_byte = U32_MAX; + + return 0; +} /* stolen from fs/buffer.c */ -void reiserfs_unmap_buffer(struct buffer_head *bh) { - lock_buffer(bh) ; - if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { - BUG() ; - } - clear_buffer_dirty(bh) ; - /* Remove the buffer from whatever list it belongs to. We are mostly - interested in removing it from per-sb j_dirty_buffers list, to avoid - BUG() on attempt to write not mapped buffer */ - if ( (!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { - struct inode *inode = bh->b_page->mapping->host; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); - spin_lock(&j->j_dirty_buffers_lock); - list_del_init(&bh->b_assoc_buffers); - reiserfs_free_jh(bh); - spin_unlock(&j->j_dirty_buffers_lock); - } - clear_buffer_mapped(bh) ; - clear_buffer_req(bh) ; - clear_buffer_new(bh); - bh->b_bdev = NULL; - unlock_buffer(bh) ; +void reiserfs_unmap_buffer(struct buffer_head *bh) +{ + lock_buffer(bh); + if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { + BUG(); + } + clear_buffer_dirty(bh); + /* Remove the buffer from whatever list it belongs to. We are mostly + interested in removing it from per-sb j_dirty_buffers list, to avoid + BUG() on attempt to write not mapped buffer */ + if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { + struct inode *inode = bh->b_page->mapping->host; + struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); + spin_lock(&j->j_dirty_buffers_lock); + list_del_init(&bh->b_assoc_buffers); + reiserfs_free_jh(bh); + spin_unlock(&j->j_dirty_buffers_lock); + } + clear_buffer_mapped(bh); + clear_buffer_req(bh); + clear_buffer_new(bh); + bh->b_bdev = NULL; + unlock_buffer(bh); } /* this first locks inode (neither reads nor sync are permitted), @@ -169,108 +172,108 @@ void reiserfs_unmap_buffer(struct buffer_head *bh) { what we expect from it (number of cut bytes). But when tail remains in the unformatted node, we set mode to SKIP_BALANCING and unlock inode */ -int indirect2direct (struct reiserfs_transaction_handle *th, - struct inode * p_s_inode, - struct page *page, - struct path * p_s_path, /* path to the indirect item. */ - const struct cpu_key * p_s_item_key, /* Key to look for unformatted node pointer to be cut. */ - loff_t n_new_file_size, /* New file size. */ - char * p_c_mode) +int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, struct page *page, struct path *p_s_path, /* path to the indirect item. */ + const struct cpu_key *p_s_item_key, /* Key to look for unformatted node pointer to be cut. */ + loff_t n_new_file_size, /* New file size. */ + char *p_c_mode) { - struct super_block * p_s_sb = p_s_inode->i_sb; - struct item_head s_ih; - unsigned long n_block_size = p_s_sb->s_blocksize; - char * tail; - int tail_len, round_tail_len; - loff_t pos, pos1; /* position of first byte of the tail */ - struct cpu_key key; + struct super_block *p_s_sb = p_s_inode->i_sb; + struct item_head s_ih; + unsigned long n_block_size = p_s_sb->s_blocksize; + char *tail; + int tail_len, round_tail_len; + loff_t pos, pos1; /* position of first byte of the tail */ + struct cpu_key key; - BUG_ON (!th->t_trans_id); + BUG_ON(!th->t_trans_id); - REISERFS_SB(p_s_sb)->s_indirect2direct ++; + REISERFS_SB(p_s_sb)->s_indirect2direct++; - *p_c_mode = M_SKIP_BALANCING; + *p_c_mode = M_SKIP_BALANCING; - /* store item head path points to. */ - copy_item_head (&s_ih, PATH_PITEM_HEAD(p_s_path)); - - tail_len = (n_new_file_size & (n_block_size - 1)); - if (get_inode_sd_version (p_s_inode) == STAT_DATA_V2) - round_tail_len = ROUND_UP (tail_len); - else - round_tail_len = tail_len; - - pos = le_ih_k_offset (&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize; - pos1 = pos; - - // we are protected by i_sem. The tail can not disapper, not - // append can be done either - // we are in truncate or packing tail in file_release - - tail = (char *)kmap(page) ; /* this can schedule */ - - if (path_changed (&s_ih, p_s_path)) { - /* re-search indirect item */ - if ( search_for_position_by_key (p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ) - reiserfs_panic(p_s_sb, "PAP-5520: indirect2direct: " - "item to be converted %K does not exist", p_s_item_key); + /* store item head path points to. */ copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); + + tail_len = (n_new_file_size & (n_block_size - 1)); + if (get_inode_sd_version(p_s_inode) == STAT_DATA_V2) + round_tail_len = ROUND_UP(tail_len); + else + round_tail_len = tail_len; + + pos = + le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE - + 1) * p_s_sb->s_blocksize; + pos1 = pos; + + // we are protected by i_sem. The tail can not disapper, not + // append can be done either + // we are in truncate or packing tail in file_release + + tail = (char *)kmap(page); /* this can schedule */ + + if (path_changed(&s_ih, p_s_path)) { + /* re-search indirect item */ + if (search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) + == POSITION_NOT_FOUND) + reiserfs_panic(p_s_sb, + "PAP-5520: indirect2direct: " + "item to be converted %K does not exist", + p_s_item_key); + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); #ifdef CONFIG_REISERFS_CHECK - pos = le_ih_k_offset (&s_ih) - 1 + - (ih_item_len(&s_ih) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize; - if (pos != pos1) - reiserfs_panic (p_s_sb, "vs-5530: indirect2direct: " - "tail position changed while we were reading it"); + pos = le_ih_k_offset(&s_ih) - 1 + + (ih_item_len(&s_ih) / UNFM_P_SIZE - + 1) * p_s_sb->s_blocksize; + if (pos != pos1) + reiserfs_panic(p_s_sb, "vs-5530: indirect2direct: " + "tail position changed while we were reading it"); #endif - } - - - /* Set direct item header to insert. */ - make_le_item_head (&s_ih, NULL, get_inode_item_key_version (p_s_inode), pos1 + 1, - TYPE_DIRECT, round_tail_len, 0xffff/*ih_free_space*/); - - /* we want a pointer to the first byte of the tail in the page. - ** the page was locked and this part of the page was up to date when - ** indirect2direct was called, so we know the bytes are still valid - */ - tail = tail + (pos & (PAGE_CACHE_SIZE - 1)) ; - - PATH_LAST_POSITION(p_s_path)++; - - key = *p_s_item_key; - set_cpu_key_k_type (&key, TYPE_DIRECT); - key.key_length = 4; - /* Insert tail as new direct item in the tree */ - if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode, - tail ? tail : NULL) < 0 ) { - /* No disk memory. So we can not convert last unformatted node - to the direct item. In this case we used to adjust - indirect items's ih_free_space. Now ih_free_space is not - used, it would be ideal to write zeros to corresponding - unformatted node. For now i_size is considered as guard for - going out of file size */ - kunmap(page) ; - return n_block_size - round_tail_len; - } - kunmap(page) ; - - /* make sure to get the i_blocks changes from reiserfs_insert_item */ - reiserfs_update_sd(th, p_s_inode); + } - // note: we have now the same as in above direct2indirect - // conversion: there are two keys which have matching first three - // key components. They only differ by the fouhth one. + /* Set direct item header to insert. */ + make_le_item_head(&s_ih, NULL, get_inode_item_key_version(p_s_inode), + pos1 + 1, TYPE_DIRECT, round_tail_len, + 0xffff /*ih_free_space */ ); + + /* we want a pointer to the first byte of the tail in the page. + ** the page was locked and this part of the page was up to date when + ** indirect2direct was called, so we know the bytes are still valid + */ + tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); + + PATH_LAST_POSITION(p_s_path)++; + + key = *p_s_item_key; + set_cpu_key_k_type(&key, TYPE_DIRECT); + key.key_length = 4; + /* Insert tail as new direct item in the tree */ + if (reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode, + tail ? tail : NULL) < 0) { + /* No disk memory. So we can not convert last unformatted node + to the direct item. In this case we used to adjust + indirect items's ih_free_space. Now ih_free_space is not + used, it would be ideal to write zeros to corresponding + unformatted node. For now i_size is considered as guard for + going out of file size */ + kunmap(page); + return n_block_size - round_tail_len; + } + kunmap(page); - /* We have inserted new direct item and must remove last - unformatted node. */ - *p_c_mode = M_CUT; + /* make sure to get the i_blocks changes from reiserfs_insert_item */ + reiserfs_update_sd(th, p_s_inode); - /* we store position of first direct item in the in-core inode */ - //mark_file_with_tail (p_s_inode, pos1 + 1); - REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1; - - return n_block_size - round_tail_len; -} + // note: we have now the same as in above direct2indirect + // conversion: there are two keys which have matching first three + // key components. They only differ by the fouhth one. + /* We have inserted new direct item and must remove last + unformatted node. */ + *p_c_mode = M_CUT; + /* we store position of first direct item in the in-core inode */ + //mark_file_with_tail (p_s_inode, pos1 + 1); + REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1; + return n_block_size - round_tail_len; +} diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 45582fe8b46..e386d3db305 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -51,67 +51,68 @@ #define PRIVROOT_NAME ".reiserfs_priv" #define XAROOT_NAME "xattrs" -static struct reiserfs_xattr_handler *find_xattr_handler_prefix (const char *prefix); +static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char + *prefix); -static struct dentry * -create_xa_root (struct super_block *sb) +static struct dentry *create_xa_root(struct super_block *sb) { - struct dentry *privroot = dget (REISERFS_SB(sb)->priv_root); - struct dentry *xaroot; - - /* This needs to be created at mount-time */ - if (!privroot) - return ERR_PTR(-EOPNOTSUPP); - - xaroot = lookup_one_len (XAROOT_NAME, privroot, strlen (XAROOT_NAME)); - if (IS_ERR (xaroot)) { - goto out; - } else if (!xaroot->d_inode) { - int err; - down (&privroot->d_inode->i_sem); - err = privroot->d_inode->i_op->mkdir (privroot->d_inode, xaroot, 0700); - up (&privroot->d_inode->i_sem); - - if (err) { - dput (xaroot); - dput (privroot); - return ERR_PTR (err); - } - REISERFS_SB(sb)->xattr_root = dget (xaroot); - } - -out: - dput (privroot); - return xaroot; + struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root); + struct dentry *xaroot; + + /* This needs to be created at mount-time */ + if (!privroot) + return ERR_PTR(-EOPNOTSUPP); + + xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); + if (IS_ERR(xaroot)) { + goto out; + } else if (!xaroot->d_inode) { + int err; + down(&privroot->d_inode->i_sem); + err = + privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot, + 0700); + up(&privroot->d_inode->i_sem); + + if (err) { + dput(xaroot); + dput(privroot); + return ERR_PTR(err); + } + REISERFS_SB(sb)->xattr_root = dget(xaroot); + } + + out: + dput(privroot); + return xaroot; } /* This will return a dentry, or error, refering to the xa root directory. * If the xa root doesn't exist yet, the dentry will be returned without * an associated inode. This dentry can be used with ->mkdir to create * the xa directory. */ -static struct dentry * -__get_xa_root (struct super_block *s) +static struct dentry *__get_xa_root(struct super_block *s) { - struct dentry *privroot = dget (REISERFS_SB(s)->priv_root); - struct dentry *xaroot = NULL; - - if (IS_ERR (privroot) || !privroot) - return privroot; - - xaroot = lookup_one_len (XAROOT_NAME, privroot, strlen (XAROOT_NAME)); - if (IS_ERR (xaroot)) { - goto out; - } else if (!xaroot->d_inode) { - dput (xaroot); - xaroot = NULL; - goto out; - } - - REISERFS_SB(s)->xattr_root = dget (xaroot); - -out: - dput (privroot); - return xaroot; + struct dentry *privroot = dget(REISERFS_SB(s)->priv_root); + struct dentry *xaroot = NULL; + + if (IS_ERR(privroot) || !privroot) + return privroot; + + xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); + if (IS_ERR(xaroot)) { + goto out; + } else if (!xaroot->d_inode) { + dput(xaroot); + xaroot = NULL; + goto out; + } + + REISERFS_SB(s)->xattr_root = dget(xaroot); + + out: + dput(privroot); + return xaroot; } /* Returns the dentry (or NULL) referring to the root of the extended @@ -119,147 +120,145 @@ out: * Otherwise, we attempt to retreive it from disk. It may also return * a pointer-encoded error. */ -static inline struct dentry * -get_xa_root (struct super_block *s) +static inline struct dentry *get_xa_root(struct super_block *s) { - struct dentry *dentry = dget (REISERFS_SB(s)->xattr_root); + struct dentry *dentry = dget(REISERFS_SB(s)->xattr_root); - if (!dentry) - dentry = __get_xa_root (s); + if (!dentry) + dentry = __get_xa_root(s); - return dentry; + return dentry; } /* Opens the directory corresponding to the inode's extended attribute store. * If flags allow, the tree to the directory may be created. If creation is * prohibited, -ENODATA is returned. */ -static struct dentry * -open_xa_dir (const struct inode *inode, int flags) +static struct dentry *open_xa_dir(const struct inode *inode, int flags) { - struct dentry *xaroot, *xadir; - char namebuf[17]; - - xaroot = get_xa_root (inode->i_sb); - if (IS_ERR (xaroot)) { - return xaroot; - } else if (!xaroot) { - if (flags == 0 || flags & XATTR_CREATE) { - xaroot = create_xa_root (inode->i_sb); - if (IS_ERR (xaroot)) - return xaroot; - } - if (!xaroot) - return ERR_PTR (-ENODATA); - } - - /* ok, we have xaroot open */ - - snprintf (namebuf, sizeof (namebuf), "%X.%X", - le32_to_cpu (INODE_PKEY (inode)->k_objectid), - inode->i_generation); - xadir = lookup_one_len (namebuf, xaroot, strlen (namebuf)); - if (IS_ERR (xadir)) { - dput (xaroot); - return xadir; - } - - if (!xadir->d_inode) { - int err; - if (flags == 0 || flags & XATTR_CREATE) { - /* Although there is nothing else trying to create this directory, - * another directory with the same hash may be created, so we need - * to protect against that */ - err = xaroot->d_inode->i_op->mkdir (xaroot->d_inode, xadir, 0700); - if (err) { - dput (xaroot); - dput (xadir); - return ERR_PTR (err); - } - } - if (!xadir->d_inode) { - dput (xaroot); - dput (xadir); - return ERR_PTR (-ENODATA); - } - } - - dput (xaroot); - return xadir; + struct dentry *xaroot, *xadir; + char namebuf[17]; + + xaroot = get_xa_root(inode->i_sb); + if (IS_ERR(xaroot)) { + return xaroot; + } else if (!xaroot) { + if (flags == 0 || flags & XATTR_CREATE) { + xaroot = create_xa_root(inode->i_sb); + if (IS_ERR(xaroot)) + return xaroot; + } + if (!xaroot) + return ERR_PTR(-ENODATA); + } + + /* ok, we have xaroot open */ + + snprintf(namebuf, sizeof(namebuf), "%X.%X", + le32_to_cpu(INODE_PKEY(inode)->k_objectid), + inode->i_generation); + xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); + if (IS_ERR(xadir)) { + dput(xaroot); + return xadir; + } + + if (!xadir->d_inode) { + int err; + if (flags == 0 || flags & XATTR_CREATE) { + /* Although there is nothing else trying to create this directory, + * another directory with the same hash may be created, so we need + * to protect against that */ + err = + xaroot->d_inode->i_op->mkdir(xaroot->d_inode, xadir, + 0700); + if (err) { + dput(xaroot); + dput(xadir); + return ERR_PTR(err); + } + } + if (!xadir->d_inode) { + dput(xaroot); + dput(xadir); + return ERR_PTR(-ENODATA); + } + } + + dput(xaroot); + return xadir; } /* Returns a dentry corresponding to a specific extended attribute file * for the inode. If flags allow, the file is created. Otherwise, a * valid or negative dentry, or an error is returned. */ -static struct dentry * -get_xa_file_dentry (const struct inode *inode, const char *name, int flags) +static struct dentry *get_xa_file_dentry(const struct inode *inode, + const char *name, int flags) { - struct dentry *xadir, *xafile; - int err = 0; - - xadir = open_xa_dir (inode, flags); - if (IS_ERR (xadir)) { - return ERR_PTR (PTR_ERR (xadir)); - } else if (xadir && !xadir->d_inode) { - dput (xadir); - return ERR_PTR (-ENODATA); - } - - xafile = lookup_one_len (name, xadir, strlen (name)); - if (IS_ERR (xafile)) { - dput (xadir); - return ERR_PTR (PTR_ERR (xafile)); - } - - if (xafile->d_inode) { /* file exists */ - if (flags & XATTR_CREATE) { - err = -EEXIST; - dput (xafile); - goto out; - } - } else if (flags & XATTR_REPLACE || flags & FL_READONLY) { - goto out; - } else { - /* inode->i_sem is down, so nothing else can try to create - * the same xattr */ - err = xadir->d_inode->i_op->create (xadir->d_inode, xafile, - 0700|S_IFREG, NULL); - - if (err) { - dput (xafile); - goto out; - } - } - -out: - dput (xadir); - if (err) - xafile = ERR_PTR (err); - return xafile; -} + struct dentry *xadir, *xafile; + int err = 0; + + xadir = open_xa_dir(inode, flags); + if (IS_ERR(xadir)) { + return ERR_PTR(PTR_ERR(xadir)); + } else if (xadir && !xadir->d_inode) { + dput(xadir); + return ERR_PTR(-ENODATA); + } + + xafile = lookup_one_len(name, xadir, strlen(name)); + if (IS_ERR(xafile)) { + dput(xadir); + return ERR_PTR(PTR_ERR(xafile)); + } + + if (xafile->d_inode) { /* file exists */ + if (flags & XATTR_CREATE) { + err = -EEXIST; + dput(xafile); + goto out; + } + } else if (flags & XATTR_REPLACE || flags & FL_READONLY) { + goto out; + } else { + /* inode->i_sem is down, so nothing else can try to create + * the same xattr */ + err = xadir->d_inode->i_op->create(xadir->d_inode, xafile, + 0700 | S_IFREG, NULL); + + if (err) { + dput(xafile); + goto out; + } + } + out: + dput(xadir); + if (err) + xafile = ERR_PTR(err); + return xafile; +} /* Opens a file pointer to the attribute associated with inode */ -static struct file * -open_xa_file (const struct inode *inode, const char *name, int flags) +static struct file *open_xa_file(const struct inode *inode, const char *name, + int flags) { - struct dentry *xafile; - struct file *fp; - - xafile = get_xa_file_dentry (inode, name, flags); - if (IS_ERR (xafile)) - return ERR_PTR (PTR_ERR (xafile)); - else if (!xafile->d_inode) { - dput (xafile); - return ERR_PTR (-ENODATA); - } + struct dentry *xafile; + struct file *fp; + + xafile = get_xa_file_dentry(inode, name, flags); + if (IS_ERR(xafile)) + return ERR_PTR(PTR_ERR(xafile)); + else if (!xafile->d_inode) { + dput(xafile); + return ERR_PTR(-ENODATA); + } - fp = dentry_open (xafile, NULL, O_RDWR); - /* dentry_open dputs the dentry if it fails */ + fp = dentry_open(xafile, NULL, O_RDWR); + /* dentry_open dputs the dentry if it fails */ - return fp; + return fp; } - /* * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but * we need to drop the path before calling the filldir struct. That @@ -273,139 +272,146 @@ open_xa_file (const struct inode *inode, const char *name, int flags) * we're called with i_sem held, so there are no worries about the directory * changing underneath us. */ -static int __xattr_readdir(struct file * filp, void * dirent, filldir_t filldir) +static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_dentry->d_inode; - struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ - INITIALIZE_PATH (path_to_entry); - struct buffer_head * bh; - int entry_num; - struct item_head * ih, tmp_ih; - int search_res; - char * local_buf; - loff_t next_pos; - char small_buf[32] ; /* avoid kmalloc if we can */ - struct reiserfs_de_head *deh; - int d_reclen; - char * d_name; - off_t d_off; - ino_t d_ino; - struct reiserfs_dir_entry de; - - - /* form key for search the next directory entry using f_pos field of - file structure */ - next_pos = max_reiserfs_offset(inode); - - while (1) { -research: - if (next_pos <= DOT_DOT_OFFSET) - break; - make_cpu_key (&pos_key, inode, next_pos, TYPE_DIRENTRY, 3); - - search_res = search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, &de); - if (search_res == IO_ERROR) { - // FIXME: we could just skip part of directory which could - // not be read - pathrelse(&path_to_entry); - return -EIO; - } - - if (search_res == NAME_NOT_FOUND) - de.de_entry_num--; + struct inode *inode = filp->f_dentry->d_inode; + struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ + INITIALIZE_PATH(path_to_entry); + struct buffer_head *bh; + int entry_num; + struct item_head *ih, tmp_ih; + int search_res; + char *local_buf; + loff_t next_pos; + char small_buf[32]; /* avoid kmalloc if we can */ + struct reiserfs_de_head *deh; + int d_reclen; + char *d_name; + off_t d_off; + ino_t d_ino; + struct reiserfs_dir_entry de; + + /* form key for search the next directory entry using f_pos field of + file structure */ + next_pos = max_reiserfs_offset(inode); + + while (1) { + research: + if (next_pos <= DOT_DOT_OFFSET) + break; + make_cpu_key(&pos_key, inode, next_pos, TYPE_DIRENTRY, 3); + + search_res = + search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, + &de); + if (search_res == IO_ERROR) { + // FIXME: we could just skip part of directory which could + // not be read + pathrelse(&path_to_entry); + return -EIO; + } - set_de_name_and_namelen(&de); - entry_num = de.de_entry_num; - deh = &(de.de_deh[entry_num]); + if (search_res == NAME_NOT_FOUND) + de.de_entry_num--; - bh = de.de_bh; - ih = de.de_ih; + set_de_name_and_namelen(&de); + entry_num = de.de_entry_num; + deh = &(de.de_deh[entry_num]); - if (!is_direntry_le_ih(ih)) { - reiserfs_warning(inode->i_sb, "not direntry %h", ih); - break; - } - copy_item_head(&tmp_ih, ih); + bh = de.de_bh; + ih = de.de_ih; - /* we must have found item, that is item of this directory, */ - RFALSE( COMP_SHORT_KEYS (&(ih->ih_key), &pos_key), - "vs-9000: found item %h does not match to dir we readdir %K", - ih, &pos_key); + if (!is_direntry_le_ih(ih)) { + reiserfs_warning(inode->i_sb, "not direntry %h", ih); + break; + } + copy_item_head(&tmp_ih, ih); - if (deh_offset(deh) <= DOT_DOT_OFFSET) { - break; - } + /* we must have found item, that is item of this directory, */ + RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key), + "vs-9000: found item %h does not match to dir we readdir %K", + ih, &pos_key); - /* look for the previous entry in the directory */ - next_pos = deh_offset (deh) - 1; + if (deh_offset(deh) <= DOT_DOT_OFFSET) { + break; + } - if (!de_visible (deh)) - /* it is hidden entry */ - continue; + /* look for the previous entry in the directory */ + next_pos = deh_offset(deh) - 1; - d_reclen = entry_length(bh, ih, entry_num); - d_name = B_I_DEH_ENTRY_FILE_NAME (bh, ih, deh); - d_off = deh_offset (deh); - d_ino = deh_objectid (deh); + if (!de_visible(deh)) + /* it is hidden entry */ + continue; - if (!d_name[d_reclen - 1]) - d_reclen = strlen (d_name); + d_reclen = entry_length(bh, ih, entry_num); + d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); + d_off = deh_offset(deh); + d_ino = deh_objectid(deh); - if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)){ - /* too big to send back to VFS */ - continue ; - } + if (!d_name[d_reclen - 1]) + d_reclen = strlen(d_name); - /* Ignore the .reiserfs_priv entry */ - if (reiserfs_xattrs (inode->i_sb) && - !old_format_only(inode->i_sb) && - deh_objectid (deh) == le32_to_cpu (INODE_PKEY(REISERFS_SB(inode->i_sb)->priv_root->d_inode)->k_objectid)) - continue; + if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)) { + /* too big to send back to VFS */ + continue; + } - if (d_reclen <= 32) { - local_buf = small_buf ; - } else { - local_buf = reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb) ; - if (!local_buf) { - pathrelse (&path_to_entry); - return -ENOMEM ; - } - if (item_moved (&tmp_ih, &path_to_entry)) { - reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; - - /* sigh, must retry. Do this same offset again */ - next_pos = d_off; - goto research; - } - } + /* Ignore the .reiserfs_priv entry */ + if (reiserfs_xattrs(inode->i_sb) && + !old_format_only(inode->i_sb) && + deh_objectid(deh) == + le32_to_cpu(INODE_PKEY + (REISERFS_SB(inode->i_sb)->priv_root->d_inode)-> + k_objectid)) + continue; + + if (d_reclen <= 32) { + local_buf = small_buf; + } else { + local_buf = + reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb); + if (!local_buf) { + pathrelse(&path_to_entry); + return -ENOMEM; + } + if (item_moved(&tmp_ih, &path_to_entry)) { + reiserfs_kfree(local_buf, d_reclen, + inode->i_sb); + + /* sigh, must retry. Do this same offset again */ + next_pos = d_off; + goto research; + } + } - // Note, that we copy name to user space via temporary - // buffer (local_buf) because filldir will block if - // user space buffer is swapped out. At that time - // entry can move to somewhere else - memcpy (local_buf, d_name, d_reclen); + // Note, that we copy name to user space via temporary + // buffer (local_buf) because filldir will block if + // user space buffer is swapped out. At that time + // entry can move to somewhere else + memcpy(local_buf, d_name, d_reclen); - /* the filldir function might need to start transactions, - * or do who knows what. Release the path now that we've - * copied all the important stuff out of the deh - */ - pathrelse (&path_to_entry); - - if (filldir (dirent, local_buf, d_reclen, d_off, d_ino, - DT_UNKNOWN) < 0) { - if (local_buf != small_buf) { - reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; - } - goto end; - } - if (local_buf != small_buf) { - reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; - } - } /* while */ + /* the filldir function might need to start transactions, + * or do who knows what. Release the path now that we've + * copied all the important stuff out of the deh + */ + pathrelse(&path_to_entry); + + if (filldir(dirent, local_buf, d_reclen, d_off, d_ino, + DT_UNKNOWN) < 0) { + if (local_buf != small_buf) { + reiserfs_kfree(local_buf, d_reclen, + inode->i_sb); + } + goto end; + } + if (local_buf != small_buf) { + reiserfs_kfree(local_buf, d_reclen, inode->i_sb); + } + } /* while */ -end: - pathrelse (&path_to_entry); - return 0; + end: + pathrelse(&path_to_entry); + return 0; } /* @@ -417,63 +423,59 @@ end: static int xattr_readdir(struct file *file, filldir_t filler, void *buf) { - struct inode *inode = file->f_dentry->d_inode; - int res = -ENOTDIR; - if (!file->f_op || !file->f_op->readdir) - goto out; - down(&inode->i_sem); + struct inode *inode = file->f_dentry->d_inode; + int res = -ENOTDIR; + if (!file->f_op || !file->f_op->readdir) + goto out; + down(&inode->i_sem); // down(&inode->i_zombie); - res = -ENOENT; - if (!IS_DEADDIR(inode)) { - lock_kernel(); - res = __xattr_readdir(file, buf, filler); - unlock_kernel(); - } + res = -ENOENT; + if (!IS_DEADDIR(inode)) { + lock_kernel(); + res = __xattr_readdir(file, buf, filler); + unlock_kernel(); + } // up(&inode->i_zombie); - up(&inode->i_sem); -out: - return res; + up(&inode->i_sem); + out: + return res; } - /* Internal operations on file data */ -static inline void -reiserfs_put_page(struct page *page) +static inline void reiserfs_put_page(struct page *page) { - kunmap(page); - page_cache_release(page); + kunmap(page); + page_cache_release(page); } -static struct page * -reiserfs_get_page(struct inode *dir, unsigned long n) +static struct page *reiserfs_get_page(struct inode *dir, unsigned long n) { - struct address_space *mapping = dir->i_mapping; - struct page *page; - /* We can deadlock if we try to free dentries, - and an unlink/rmdir has just occured - GFP_NOFS avoids this */ - mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS; - page = read_cache_page (mapping, n, - (filler_t*)mapping->a_ops->readpage, NULL); - if (!IS_ERR(page)) { - wait_on_page_locked(page); - kmap(page); - if (!PageUptodate(page)) - goto fail; - - if (PageError(page)) - goto fail; - } - return page; - -fail: - reiserfs_put_page(page); - return ERR_PTR(-EIO); + struct address_space *mapping = dir->i_mapping; + struct page *page; + /* We can deadlock if we try to free dentries, + and an unlink/rmdir has just occured - GFP_NOFS avoids this */ + mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS; + page = read_cache_page(mapping, n, + (filler_t *) mapping->a_ops->readpage, NULL); + if (!IS_ERR(page)) { + wait_on_page_locked(page); + kmap(page); + if (!PageUptodate(page)) + goto fail; + + if (PageError(page)) + goto fail; + } + return page; + + fail: + reiserfs_put_page(page); + return ERR_PTR(-EIO); } -static inline __u32 -xattr_hash (const char *msg, int len) +static inline __u32 xattr_hash(const char *msg, int len) { - return csum_partial (msg, len, 0); + return csum_partial(msg, len, 0); } /* Generic extended attribute operations that can be used by xa plugins */ @@ -482,294 +484,300 @@ xattr_hash (const char *msg, int len) * inode->i_sem: down */ int -reiserfs_xattr_set (struct inode *inode, const char *name, const void *buffer, - size_t buffer_size, int flags) +reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, + size_t buffer_size, int flags) { - int err = 0; - struct file *fp; - struct page *page; - char *data; - struct address_space *mapping; - size_t file_pos = 0; - size_t buffer_pos = 0; - struct inode *xinode; - struct iattr newattrs; - __u32 xahash = 0; - - if (IS_RDONLY (inode)) - return -EROFS; - - if (IS_IMMUTABLE (inode) || IS_APPEND (inode)) - return -EPERM; - - if (get_inode_sd_version (inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - /* Empty xattrs are ok, they're just empty files, no hash */ - if (buffer && buffer_size) - xahash = xattr_hash (buffer, buffer_size); - -open_file: - fp = open_xa_file (inode, name, flags); - if (IS_ERR (fp)) { - err = PTR_ERR (fp); - goto out; - } - - xinode = fp->f_dentry->d_inode; - REISERFS_I(inode)->i_flags |= i_has_xattr_dir; - - /* we need to copy it off.. */ - if (xinode->i_nlink > 1) { - fput(fp); - err = reiserfs_xattr_del (inode, name); - if (err < 0) - goto out; - /* We just killed the old one, we're not replacing anymore */ - if (flags & XATTR_REPLACE) - flags &= ~XATTR_REPLACE; - goto open_file; - } - - /* Resize it so we're ok to write there */ - newattrs.ia_size = buffer_size; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - down (&xinode->i_sem); - err = notify_change(fp->f_dentry, &newattrs); - if (err) - goto out_filp; - - mapping = xinode->i_mapping; - while (buffer_pos < buffer_size || buffer_pos == 0) { - size_t chunk; - size_t skip = 0; - size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1)); - if (buffer_size - buffer_pos > PAGE_CACHE_SIZE) - chunk = PAGE_CACHE_SIZE; - else - chunk = buffer_size - buffer_pos; - - page = reiserfs_get_page (xinode, file_pos >> PAGE_CACHE_SHIFT); - if (IS_ERR (page)) { - err = PTR_ERR (page); - goto out_filp; - } - - lock_page (page); - data = page_address (page); - - if (file_pos == 0) { - struct reiserfs_xattr_header *rxh; - skip = file_pos = sizeof (struct reiserfs_xattr_header); - if (chunk + skip > PAGE_CACHE_SIZE) - chunk = PAGE_CACHE_SIZE - skip; - rxh = (struct reiserfs_xattr_header *)data; - rxh->h_magic = cpu_to_le32 (REISERFS_XATTR_MAGIC); - rxh->h_hash = cpu_to_le32 (xahash); - } - - err = mapping->a_ops->prepare_write (fp, page, page_offset, - page_offset + chunk + skip); - if (!err) { - if (buffer) - memcpy (data + skip, buffer + buffer_pos, chunk); - err = mapping->a_ops->commit_write (fp, page, page_offset, - page_offset + chunk + skip); + int err = 0; + struct file *fp; + struct page *page; + char *data; + struct address_space *mapping; + size_t file_pos = 0; + size_t buffer_pos = 0; + struct inode *xinode; + struct iattr newattrs; + __u32 xahash = 0; + + if (IS_RDONLY(inode)) + return -EROFS; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + + if (get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + + /* Empty xattrs are ok, they're just empty files, no hash */ + if (buffer && buffer_size) + xahash = xattr_hash(buffer, buffer_size); + + open_file: + fp = open_xa_file(inode, name, flags); + if (IS_ERR(fp)) { + err = PTR_ERR(fp); + goto out; + } + + xinode = fp->f_dentry->d_inode; + REISERFS_I(inode)->i_flags |= i_has_xattr_dir; + + /* we need to copy it off.. */ + if (xinode->i_nlink > 1) { + fput(fp); + err = reiserfs_xattr_del(inode, name); + if (err < 0) + goto out; + /* We just killed the old one, we're not replacing anymore */ + if (flags & XATTR_REPLACE) + flags &= ~XATTR_REPLACE; + goto open_file; + } + + /* Resize it so we're ok to write there */ + newattrs.ia_size = buffer_size; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + down(&xinode->i_sem); + err = notify_change(fp->f_dentry, &newattrs); + if (err) + goto out_filp; + + mapping = xinode->i_mapping; + while (buffer_pos < buffer_size || buffer_pos == 0) { + size_t chunk; + size_t skip = 0; + size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1)); + if (buffer_size - buffer_pos > PAGE_CACHE_SIZE) + chunk = PAGE_CACHE_SIZE; + else + chunk = buffer_size - buffer_pos; + + page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out_filp; + } + + lock_page(page); + data = page_address(page); + + if (file_pos == 0) { + struct reiserfs_xattr_header *rxh; + skip = file_pos = sizeof(struct reiserfs_xattr_header); + if (chunk + skip > PAGE_CACHE_SIZE) + chunk = PAGE_CACHE_SIZE - skip; + rxh = (struct reiserfs_xattr_header *)data; + rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC); + rxh->h_hash = cpu_to_le32(xahash); + } + + err = mapping->a_ops->prepare_write(fp, page, page_offset, + page_offset + chunk + skip); + if (!err) { + if (buffer) + memcpy(data + skip, buffer + buffer_pos, chunk); + err = + mapping->a_ops->commit_write(fp, page, page_offset, + page_offset + chunk + + skip); + } + unlock_page(page); + reiserfs_put_page(page); + buffer_pos += chunk; + file_pos += chunk; + skip = 0; + if (err || buffer_size == 0 || !buffer) + break; + } + + /* We can't mark the inode dirty if it's not hashed. This is the case + * when we're inheriting the default ACL. If we dirty it, the inode + * gets marked dirty, but won't (ever) make it onto the dirty list until + * it's synced explicitly to clear I_DIRTY. This is bad. */ + if (!hlist_unhashed(&inode->i_hash)) { + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); } - unlock_page (page); - reiserfs_put_page (page); - buffer_pos += chunk; - file_pos += chunk; - skip = 0; - if (err || buffer_size == 0 || !buffer) - break; - } - - /* We can't mark the inode dirty if it's not hashed. This is the case - * when we're inheriting the default ACL. If we dirty it, the inode - * gets marked dirty, but won't (ever) make it onto the dirty list until - * it's synced explicitly to clear I_DIRTY. This is bad. */ - if (!hlist_unhashed(&inode->i_hash)) { - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty (inode); - } - -out_filp: - up (&xinode->i_sem); - fput(fp); - -out: - return err; + + out_filp: + up(&xinode->i_sem); + fput(fp); + + out: + return err; } /* * inode->i_sem: down */ int -reiserfs_xattr_get (const struct inode *inode, const char *name, void *buffer, - size_t buffer_size) +reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer, + size_t buffer_size) { - ssize_t err = 0; - struct file *fp; - size_t isize; - size_t file_pos = 0; - size_t buffer_pos = 0; - struct page *page; - struct inode *xinode; - __u32 hash = 0; - - if (name == NULL) - return -EINVAL; - - /* We can't have xattrs attached to v1 items since they don't have - * generation numbers */ - if (get_inode_sd_version (inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - fp = open_xa_file (inode, name, FL_READONLY); - if (IS_ERR (fp)) { - err = PTR_ERR (fp); - goto out; - } - - xinode = fp->f_dentry->d_inode; - isize = xinode->i_size; - REISERFS_I(inode)->i_flags |= i_has_xattr_dir; - - /* Just return the size needed */ - if (buffer == NULL) { - err = isize - sizeof (struct reiserfs_xattr_header); - goto out_dput; - } - - if (buffer_size < isize - sizeof (struct reiserfs_xattr_header)) { - err = -ERANGE; - goto out_dput; - } - - while (file_pos < isize) { - size_t chunk; - char *data; - size_t skip = 0; - if (isize - file_pos > PAGE_CACHE_SIZE) - chunk = PAGE_CACHE_SIZE; - else - chunk = isize - file_pos; - - page = reiserfs_get_page (xinode, file_pos >> PAGE_CACHE_SHIFT); - if (IS_ERR (page)) { - err = PTR_ERR (page); - goto out_dput; - } - - lock_page (page); - data = page_address (page); - if (file_pos == 0) { - struct reiserfs_xattr_header *rxh = - (struct reiserfs_xattr_header *)data; - skip = file_pos = sizeof (struct reiserfs_xattr_header); - chunk -= skip; - /* Magic doesn't match up.. */ - if (rxh->h_magic != cpu_to_le32 (REISERFS_XATTR_MAGIC)) { - unlock_page (page); - reiserfs_put_page (page); - reiserfs_warning (inode->i_sb, "Invalid magic for xattr (%s) " - "associated with %k", name, - INODE_PKEY (inode)); - err = -EIO; - goto out_dput; - } - hash = le32_to_cpu (rxh->h_hash); - } - memcpy (buffer + buffer_pos, data + skip, chunk); - unlock_page (page); - reiserfs_put_page (page); - file_pos += chunk; - buffer_pos += chunk; - skip = 0; - } - err = isize - sizeof (struct reiserfs_xattr_header); - - if (xattr_hash (buffer, isize - sizeof (struct reiserfs_xattr_header)) != hash) { - reiserfs_warning (inode->i_sb, "Invalid hash for xattr (%s) associated " - "with %k", name, INODE_PKEY (inode)); - err = -EIO; - } - -out_dput: - fput(fp); - -out: - return err; + ssize_t err = 0; + struct file *fp; + size_t isize; + size_t file_pos = 0; + size_t buffer_pos = 0; + struct page *page; + struct inode *xinode; + __u32 hash = 0; + + if (name == NULL) + return -EINVAL; + + /* We can't have xattrs attached to v1 items since they don't have + * generation numbers */ + if (get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + + fp = open_xa_file(inode, name, FL_READONLY); + if (IS_ERR(fp)) { + err = PTR_ERR(fp); + goto out; + } + + xinode = fp->f_dentry->d_inode; + isize = xinode->i_size; + REISERFS_I(inode)->i_flags |= i_has_xattr_dir; + + /* Just return the size needed */ + if (buffer == NULL) { + err = isize - sizeof(struct reiserfs_xattr_header); + goto out_dput; + } + + if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) { + err = -ERANGE; + goto out_dput; + } + + while (file_pos < isize) { + size_t chunk; + char *data; + size_t skip = 0; + if (isize - file_pos > PAGE_CACHE_SIZE) + chunk = PAGE_CACHE_SIZE; + else + chunk = isize - file_pos; + + page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out_dput; + } + + lock_page(page); + data = page_address(page); + if (file_pos == 0) { + struct reiserfs_xattr_header *rxh = + (struct reiserfs_xattr_header *)data; + skip = file_pos = sizeof(struct reiserfs_xattr_header); + chunk -= skip; + /* Magic doesn't match up.. */ + if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) { + unlock_page(page); + reiserfs_put_page(page); + reiserfs_warning(inode->i_sb, + "Invalid magic for xattr (%s) " + "associated with %k", name, + INODE_PKEY(inode)); + err = -EIO; + goto out_dput; + } + hash = le32_to_cpu(rxh->h_hash); + } + memcpy(buffer + buffer_pos, data + skip, chunk); + unlock_page(page); + reiserfs_put_page(page); + file_pos += chunk; + buffer_pos += chunk; + skip = 0; + } + err = isize - sizeof(struct reiserfs_xattr_header); + + if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) != + hash) { + reiserfs_warning(inode->i_sb, + "Invalid hash for xattr (%s) associated " + "with %k", name, INODE_PKEY(inode)); + err = -EIO; + } + + out_dput: + fput(fp); + + out: + return err; } static int -__reiserfs_xattr_del (struct dentry *xadir, const char *name, int namelen) +__reiserfs_xattr_del(struct dentry *xadir, const char *name, int namelen) { - struct dentry *dentry; - struct inode *dir = xadir->d_inode; - int err = 0; - - dentry = lookup_one_len (name, xadir, namelen); - if (IS_ERR (dentry)) { - err = PTR_ERR (dentry); - goto out; - } else if (!dentry->d_inode) { - err = -ENODATA; - goto out_file; - } - - /* Skip directories.. */ - if (S_ISDIR (dentry->d_inode->i_mode)) - goto out_file; - - if (!is_reiserfs_priv_object (dentry->d_inode)) { - reiserfs_warning (dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have " - "priv flag set [parent is %sset].", - le32_to_cpu (INODE_PKEY (dentry->d_inode)->k_objectid), - xadir->d_name.len, xadir->d_name.name, namelen, name, - is_reiserfs_priv_object (xadir->d_inode) ? "" : "not "); - dput (dentry); - return -EIO; - } - - err = dir->i_op->unlink (dir, dentry); - if (!err) - d_delete (dentry); - -out_file: - dput (dentry); - -out: - return err; -} + struct dentry *dentry; + struct inode *dir = xadir->d_inode; + int err = 0; + + dentry = lookup_one_len(name, xadir, namelen); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out; + } else if (!dentry->d_inode) { + err = -ENODATA; + goto out_file; + } + + /* Skip directories.. */ + if (S_ISDIR(dentry->d_inode->i_mode)) + goto out_file; + + if (!is_reiserfs_priv_object(dentry->d_inode)) { + reiserfs_warning(dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have " + "priv flag set [parent is %sset].", + le32_to_cpu(INODE_PKEY(dentry->d_inode)-> + k_objectid), xadir->d_name.len, + xadir->d_name.name, namelen, name, + is_reiserfs_priv_object(xadir-> + d_inode) ? "" : + "not "); + dput(dentry); + return -EIO; + } + err = dir->i_op->unlink(dir, dentry); + if (!err) + d_delete(dentry); -int -reiserfs_xattr_del (struct inode *inode, const char *name) + out_file: + dput(dentry); + + out: + return err; +} + +int reiserfs_xattr_del(struct inode *inode, const char *name) { - struct dentry *dir; - int err; + struct dentry *dir; + int err; - if (IS_RDONLY (inode)) - return -EROFS; + if (IS_RDONLY(inode)) + return -EROFS; - dir = open_xa_dir (inode, FL_READONLY); - if (IS_ERR (dir)) { - err = PTR_ERR (dir); - goto out; - } + dir = open_xa_dir(inode, FL_READONLY); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; + } - err = __reiserfs_xattr_del (dir, name, strlen (name)); - dput (dir); + err = __reiserfs_xattr_del(dir, name, strlen(name)); + dput(dir); - if (!err) { - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty (inode); - } + if (!err) { + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + } -out: - return err; + out: + return err; } /* The following are side effects of other operations that aren't explicitly @@ -777,167 +785,163 @@ out: * or ownership changes, object deletions, etc. */ static int -reiserfs_delete_xattrs_filler (void *buf, const char *name, int namelen, - loff_t offset, ino_t ino, unsigned int d_type) +reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, + loff_t offset, ino_t ino, unsigned int d_type) { - struct dentry *xadir = (struct dentry *)buf; + struct dentry *xadir = (struct dentry *)buf; - return __reiserfs_xattr_del (xadir, name, namelen); + return __reiserfs_xattr_del(xadir, name, namelen); } /* This is called w/ inode->i_sem downed */ -int -reiserfs_delete_xattrs (struct inode *inode) +int reiserfs_delete_xattrs(struct inode *inode) { - struct file *fp; - struct dentry *dir, *root; - int err = 0; - - /* Skip out, an xattr has no xattrs associated with it */ - if (is_reiserfs_priv_object (inode) || - get_inode_sd_version (inode) == STAT_DATA_V1 || - !reiserfs_xattrs(inode->i_sb)) - { - return 0; - } - reiserfs_read_lock_xattrs (inode->i_sb); - dir = open_xa_dir (inode, FL_READONLY); - reiserfs_read_unlock_xattrs (inode->i_sb); - if (IS_ERR (dir)) { - err = PTR_ERR (dir); - goto out; - } else if (!dir->d_inode) { - dput (dir); - return 0; - } - - fp = dentry_open (dir, NULL, O_RDWR); - if (IS_ERR (fp)) { - err = PTR_ERR (fp); - /* dentry_open dputs the dentry if it fails */ - goto out; - } - - lock_kernel (); - err = xattr_readdir (fp, reiserfs_delete_xattrs_filler, dir); - if (err) { - unlock_kernel (); - goto out_dir; - } - - /* Leftovers besides . and .. -- that's not good. */ - if (dir->d_inode->i_nlink <= 2) { - root = get_xa_root (inode->i_sb); - reiserfs_write_lock_xattrs (inode->i_sb); - err = vfs_rmdir (root->d_inode, dir); - reiserfs_write_unlock_xattrs (inode->i_sb); - dput (root); - } else { - reiserfs_warning (inode->i_sb, - "Couldn't remove all entries in directory"); - } - unlock_kernel (); - -out_dir: - fput(fp); - -out: - if (!err) - REISERFS_I(inode)->i_flags = REISERFS_I(inode)->i_flags & ~i_has_xattr_dir; - return err; + struct file *fp; + struct dentry *dir, *root; + int err = 0; + + /* Skip out, an xattr has no xattrs associated with it */ + if (is_reiserfs_priv_object(inode) || + get_inode_sd_version(inode) == STAT_DATA_V1 || + !reiserfs_xattrs(inode->i_sb)) { + return 0; + } + reiserfs_read_lock_xattrs(inode->i_sb); + dir = open_xa_dir(inode, FL_READONLY); + reiserfs_read_unlock_xattrs(inode->i_sb); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; + } else if (!dir->d_inode) { + dput(dir); + return 0; + } + + fp = dentry_open(dir, NULL, O_RDWR); + if (IS_ERR(fp)) { + err = PTR_ERR(fp); + /* dentry_open dputs the dentry if it fails */ + goto out; + } + + lock_kernel(); + err = xattr_readdir(fp, reiserfs_delete_xattrs_filler, dir); + if (err) { + unlock_kernel(); + goto out_dir; + } + + /* Leftovers besides . and .. -- that's not good. */ + if (dir->d_inode->i_nlink <= 2) { + root = get_xa_root(inode->i_sb); + reiserfs_write_lock_xattrs(inode->i_sb); + err = vfs_rmdir(root->d_inode, dir); + reiserfs_write_unlock_xattrs(inode->i_sb); + dput(root); + } else { + reiserfs_warning(inode->i_sb, + "Couldn't remove all entries in directory"); + } + unlock_kernel(); + + out_dir: + fput(fp); + + out: + if (!err) + REISERFS_I(inode)->i_flags = + REISERFS_I(inode)->i_flags & ~i_has_xattr_dir; + return err; } struct reiserfs_chown_buf { - struct inode *inode; - struct dentry *xadir; - struct iattr *attrs; + struct inode *inode; + struct dentry *xadir; + struct iattr *attrs; }; /* XXX: If there is a better way to do this, I'd love to hear about it */ static int -reiserfs_chown_xattrs_filler (void *buf, const char *name, int namelen, - loff_t offset, ino_t ino, unsigned int d_type) +reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen, + loff_t offset, ino_t ino, unsigned int d_type) { - struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; - struct dentry *xafile, *xadir = chown_buf->xadir; - struct iattr *attrs = chown_buf->attrs; - int err = 0; - - xafile = lookup_one_len (name, xadir, namelen); - if (IS_ERR (xafile)) - return PTR_ERR (xafile); - else if (!xafile->d_inode) { - dput (xafile); - return -ENODATA; - } - - if (!S_ISDIR (xafile->d_inode->i_mode)) - err = notify_change (xafile, attrs); - dput (xafile); - - return err; + struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; + struct dentry *xafile, *xadir = chown_buf->xadir; + struct iattr *attrs = chown_buf->attrs; + int err = 0; + + xafile = lookup_one_len(name, xadir, namelen); + if (IS_ERR(xafile)) + return PTR_ERR(xafile); + else if (!xafile->d_inode) { + dput(xafile); + return -ENODATA; + } + + if (!S_ISDIR(xafile->d_inode->i_mode)) + err = notify_change(xafile, attrs); + dput(xafile); + + return err; } -int -reiserfs_chown_xattrs (struct inode *inode, struct iattr *attrs) +int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) { - struct file *fp; - struct dentry *dir; - int err = 0; - struct reiserfs_chown_buf buf; - unsigned int ia_valid = attrs->ia_valid; - - /* Skip out, an xattr has no xattrs associated with it */ - if (is_reiserfs_priv_object (inode) || - get_inode_sd_version (inode) == STAT_DATA_V1 || - !reiserfs_xattrs(inode->i_sb)) - { - return 0; - } - reiserfs_read_lock_xattrs (inode->i_sb); - dir = open_xa_dir (inode, FL_READONLY); - reiserfs_read_unlock_xattrs (inode->i_sb); - if (IS_ERR (dir)) { - if (PTR_ERR (dir) != -ENODATA) - err = PTR_ERR (dir); - goto out; - } else if (!dir->d_inode) { - dput (dir); - goto out; - } - - fp = dentry_open (dir, NULL, O_RDWR); - if (IS_ERR (fp)) { - err = PTR_ERR (fp); - /* dentry_open dputs the dentry if it fails */ - goto out; - } - - lock_kernel (); - - attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME); - buf.xadir = dir; - buf.attrs = attrs; - buf.inode = inode; - - err = xattr_readdir (fp, reiserfs_chown_xattrs_filler, &buf); - if (err) { - unlock_kernel (); - goto out_dir; - } - - err = notify_change (dir, attrs); - unlock_kernel (); - -out_dir: - fput(fp); - -out: - attrs->ia_valid = ia_valid; - return err; -} + struct file *fp; + struct dentry *dir; + int err = 0; + struct reiserfs_chown_buf buf; + unsigned int ia_valid = attrs->ia_valid; + + /* Skip out, an xattr has no xattrs associated with it */ + if (is_reiserfs_priv_object(inode) || + get_inode_sd_version(inode) == STAT_DATA_V1 || + !reiserfs_xattrs(inode->i_sb)) { + return 0; + } + reiserfs_read_lock_xattrs(inode->i_sb); + dir = open_xa_dir(inode, FL_READONLY); + reiserfs_read_unlock_xattrs(inode->i_sb); + if (IS_ERR(dir)) { + if (PTR_ERR(dir) != -ENODATA) + err = PTR_ERR(dir); + goto out; + } else if (!dir->d_inode) { + dput(dir); + goto out; + } + + fp = dentry_open(dir, NULL, O_RDWR); + if (IS_ERR(fp)) { + err = PTR_ERR(fp); + /* dentry_open dputs the dentry if it fails */ + goto out; + } + lock_kernel(); + + attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME); + buf.xadir = dir; + buf.attrs = attrs; + buf.inode = inode; + + err = xattr_readdir(fp, reiserfs_chown_xattrs_filler, &buf); + if (err) { + unlock_kernel(); + goto out_dir; + } + + err = notify_change(dir, attrs); + unlock_kernel(); + + out_dir: + fput(fp); + + out: + attrs->ia_valid = ia_valid; + return err; +} /* Actual operations that are exported to VFS-land */ @@ -946,61 +950,60 @@ out: * Preliminary locking: we down dentry->d_inode->i_sem */ ssize_t -reiserfs_getxattr (struct dentry *dentry, const char *name, void *buffer, - size_t size) +reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, + size_t size) { - struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); - int err; - - if (!xah || !reiserfs_xattrs(dentry->d_sb) || - get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - reiserfs_read_lock_xattr_i (dentry->d_inode); - reiserfs_read_lock_xattrs (dentry->d_sb); - err = xah->get (dentry->d_inode, name, buffer, size); - reiserfs_read_unlock_xattrs (dentry->d_sb); - reiserfs_read_unlock_xattr_i (dentry->d_inode); - return err; + struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); + int err; + + if (!xah || !reiserfs_xattrs(dentry->d_sb) || + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + + reiserfs_read_lock_xattr_i(dentry->d_inode); + reiserfs_read_lock_xattrs(dentry->d_sb); + err = xah->get(dentry->d_inode, name, buffer, size); + reiserfs_read_unlock_xattrs(dentry->d_sb); + reiserfs_read_unlock_xattr_i(dentry->d_inode); + return err; } - /* * Inode operation setxattr() * * dentry->d_inode->i_sem down */ int -reiserfs_setxattr (struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) +reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) { - struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); - int err; - int lock; - - if (!xah || !reiserfs_xattrs(dentry->d_sb) || - get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - if (IS_RDONLY (dentry->d_inode)) - return -EROFS; - - if (IS_IMMUTABLE (dentry->d_inode) || IS_APPEND (dentry->d_inode)) - return -EROFS; - - reiserfs_write_lock_xattr_i (dentry->d_inode); - lock = !has_xattr_dir (dentry->d_inode); - if (lock) - reiserfs_write_lock_xattrs (dentry->d_sb); - else - reiserfs_read_lock_xattrs (dentry->d_sb); - err = xah->set (dentry->d_inode, name, value, size, flags); - if (lock) - reiserfs_write_unlock_xattrs (dentry->d_sb); - else - reiserfs_read_unlock_xattrs (dentry->d_sb); - reiserfs_write_unlock_xattr_i (dentry->d_inode); - return err; + struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); + int err; + int lock; + + if (!xah || !reiserfs_xattrs(dentry->d_sb) || + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + + if (IS_RDONLY(dentry->d_inode)) + return -EROFS; + + if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) + return -EROFS; + + reiserfs_write_lock_xattr_i(dentry->d_inode); + lock = !has_xattr_dir(dentry->d_inode); + if (lock) + reiserfs_write_lock_xattrs(dentry->d_sb); + else + reiserfs_read_lock_xattrs(dentry->d_sb); + err = xah->set(dentry->d_inode, name, value, size, flags); + if (lock) + reiserfs_write_unlock_xattrs(dentry->d_sb); + else + reiserfs_read_unlock_xattrs(dentry->d_sb); + reiserfs_write_unlock_xattr_i(dentry->d_inode); + return err; } /* @@ -1008,344 +1011,343 @@ reiserfs_setxattr (struct dentry *dentry, const char *name, const void *value, * * dentry->d_inode->i_sem down */ -int -reiserfs_removexattr (struct dentry *dentry, const char *name) +int reiserfs_removexattr(struct dentry *dentry, const char *name) { - int err; - struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); + int err; + struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); - if (!xah || !reiserfs_xattrs(dentry->d_sb) || - get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) - return -EOPNOTSUPP; + if (!xah || !reiserfs_xattrs(dentry->d_sb) || + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; - if (IS_RDONLY (dentry->d_inode)) - return -EROFS; + if (IS_RDONLY(dentry->d_inode)) + return -EROFS; - if (IS_IMMUTABLE (dentry->d_inode) || IS_APPEND (dentry->d_inode)) - return -EPERM; + if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) + return -EPERM; - reiserfs_write_lock_xattr_i (dentry->d_inode); - reiserfs_read_lock_xattrs (dentry->d_sb); + reiserfs_write_lock_xattr_i(dentry->d_inode); + reiserfs_read_lock_xattrs(dentry->d_sb); - /* Deletion pre-operation */ - if (xah->del) { - err = xah->del (dentry->d_inode, name); - if (err) - goto out; - } + /* Deletion pre-operation */ + if (xah->del) { + err = xah->del(dentry->d_inode, name); + if (err) + goto out; + } - err = reiserfs_xattr_del (dentry->d_inode, name); + err = reiserfs_xattr_del(dentry->d_inode, name); - dentry->d_inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty (dentry->d_inode); + dentry->d_inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(dentry->d_inode); -out: - reiserfs_read_unlock_xattrs (dentry->d_sb); - reiserfs_write_unlock_xattr_i (dentry->d_inode); - return err; + out: + reiserfs_read_unlock_xattrs(dentry->d_sb); + reiserfs_write_unlock_xattr_i(dentry->d_inode); + return err; } - /* This is what filldir will use: * r_pos will always contain the amount of space required for the entire * list. If r_pos becomes larger than r_size, we need more space and we * return an error indicating this. If r_pos is less than r_size, then we've * filled the buffer successfully and we return success */ struct reiserfs_listxattr_buf { - int r_pos; - int r_size; - char *r_buf; - struct inode *r_inode; + int r_pos; + int r_size; + char *r_buf; + struct inode *r_inode; }; static int -reiserfs_listxattr_filler (void *buf, const char *name, int namelen, - loff_t offset, ino_t ino, unsigned int d_type) +reiserfs_listxattr_filler(void *buf, const char *name, int namelen, + loff_t offset, ino_t ino, unsigned int d_type) { - struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; - int len = 0; - if (name[0] != '.' || (namelen != 1 && (name[1] != '.' || namelen != 2))) { - struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); - if (!xah) return 0; /* Unsupported xattr name, skip it */ - - /* We call ->list() twice because the operation isn't required to just - * return the name back - we want to make sure we have enough space */ - len += xah->list (b->r_inode, name, namelen, NULL); - - if (len) { - if (b->r_pos + len + 1 <= b->r_size) { - char *p = b->r_buf + b->r_pos; - p += xah->list (b->r_inode, name, namelen, p); - *p++ = '\0'; - } - b->r_pos += len + 1; - } - } - - return 0; + struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; + int len = 0; + if (name[0] != '.' + || (namelen != 1 && (name[1] != '.' || namelen != 2))) { + struct reiserfs_xattr_handler *xah = + find_xattr_handler_prefix(name); + if (!xah) + return 0; /* Unsupported xattr name, skip it */ + + /* We call ->list() twice because the operation isn't required to just + * return the name back - we want to make sure we have enough space */ + len += xah->list(b->r_inode, name, namelen, NULL); + + if (len) { + if (b->r_pos + len + 1 <= b->r_size) { + char *p = b->r_buf + b->r_pos; + p += xah->list(b->r_inode, name, namelen, p); + *p++ = '\0'; + } + b->r_pos += len + 1; + } + } + + return 0; } + /* * Inode operation listxattr() * * Preliminary locking: we down dentry->d_inode->i_sem */ -ssize_t -reiserfs_listxattr (struct dentry *dentry, char *buffer, size_t size) +ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) { - struct file *fp; - struct dentry *dir; - int err = 0; - struct reiserfs_listxattr_buf buf; - - if (!dentry->d_inode) - return -EINVAL; - - if (!reiserfs_xattrs(dentry->d_sb) || - get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - reiserfs_read_lock_xattr_i (dentry->d_inode); - reiserfs_read_lock_xattrs (dentry->d_sb); - dir = open_xa_dir (dentry->d_inode, FL_READONLY); - reiserfs_read_unlock_xattrs (dentry->d_sb); - if (IS_ERR (dir)) { - err = PTR_ERR (dir); - if (err == -ENODATA) - err = 0; /* Not an error if there aren't any xattrs */ - goto out; - } - - fp = dentry_open (dir, NULL, O_RDWR); - if (IS_ERR (fp)) { - err = PTR_ERR (fp); - /* dentry_open dputs the dentry if it fails */ - goto out; - } - - buf.r_buf = buffer; - buf.r_size = buffer ? size : 0; - buf.r_pos = 0; - buf.r_inode = dentry->d_inode; - - REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir; - - err = xattr_readdir (fp, reiserfs_listxattr_filler, &buf); - if (err) - goto out_dir; - - if (buf.r_pos > buf.r_size && buffer != NULL) - err = -ERANGE; - else - err = buf.r_pos; - -out_dir: - fput(fp); - -out: - reiserfs_read_unlock_xattr_i (dentry->d_inode); - return err; + struct file *fp; + struct dentry *dir; + int err = 0; + struct reiserfs_listxattr_buf buf; + + if (!dentry->d_inode) + return -EINVAL; + + if (!reiserfs_xattrs(dentry->d_sb) || + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + + reiserfs_read_lock_xattr_i(dentry->d_inode); + reiserfs_read_lock_xattrs(dentry->d_sb); + dir = open_xa_dir(dentry->d_inode, FL_READONLY); + reiserfs_read_unlock_xattrs(dentry->d_sb); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + if (err == -ENODATA) + err = 0; /* Not an error if there aren't any xattrs */ + goto out; + } + + fp = dentry_open(dir, NULL, O_RDWR); + if (IS_ERR(fp)) { + err = PTR_ERR(fp); + /* dentry_open dputs the dentry if it fails */ + goto out; + } + + buf.r_buf = buffer; + buf.r_size = buffer ? size : 0; + buf.r_pos = 0; + buf.r_inode = dentry->d_inode; + + REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir; + + err = xattr_readdir(fp, reiserfs_listxattr_filler, &buf); + if (err) + goto out_dir; + + if (buf.r_pos > buf.r_size && buffer != NULL) + err = -ERANGE; + else + err = buf.r_pos; + + out_dir: + fput(fp); + + out: + reiserfs_read_unlock_xattr_i(dentry->d_inode); + return err; } /* This is the implementation for the xattr plugin infrastructure */ -static struct list_head xattr_handlers = LIST_HEAD_INIT (xattr_handlers); +static struct list_head xattr_handlers = LIST_HEAD_INIT(xattr_handlers); static DEFINE_RWLOCK(handler_lock); -static struct reiserfs_xattr_handler * -find_xattr_handler_prefix (const char *prefix) +static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char + *prefix) { - struct reiserfs_xattr_handler *xah = NULL; - struct list_head *p; - - read_lock (&handler_lock); - list_for_each (p, &xattr_handlers) { - xah = list_entry (p, struct reiserfs_xattr_handler, handlers); - if (strncmp (xah->prefix, prefix, strlen (xah->prefix)) == 0) - break; - xah = NULL; - } - - read_unlock (&handler_lock); - return xah; + struct reiserfs_xattr_handler *xah = NULL; + struct list_head *p; + + read_lock(&handler_lock); + list_for_each(p, &xattr_handlers) { + xah = list_entry(p, struct reiserfs_xattr_handler, handlers); + if (strncmp(xah->prefix, prefix, strlen(xah->prefix)) == 0) + break; + xah = NULL; + } + + read_unlock(&handler_lock); + return xah; } -static void -__unregister_handlers (void) +static void __unregister_handlers(void) { - struct reiserfs_xattr_handler *xah; - struct list_head *p, *tmp; + struct reiserfs_xattr_handler *xah; + struct list_head *p, *tmp; - list_for_each_safe (p, tmp, &xattr_handlers) { - xah = list_entry (p, struct reiserfs_xattr_handler, handlers); - if (xah->exit) - xah->exit(); + list_for_each_safe(p, tmp, &xattr_handlers) { + xah = list_entry(p, struct reiserfs_xattr_handler, handlers); + if (xah->exit) + xah->exit(); - list_del_init (p); - } - INIT_LIST_HEAD (&xattr_handlers); + list_del_init(p); + } + INIT_LIST_HEAD(&xattr_handlers); } -int __init -reiserfs_xattr_register_handlers (void) +int __init reiserfs_xattr_register_handlers(void) { - int err = 0; - struct reiserfs_xattr_handler *xah; - struct list_head *p; + int err = 0; + struct reiserfs_xattr_handler *xah; + struct list_head *p; - write_lock (&handler_lock); + write_lock(&handler_lock); - /* If we're already initialized, nothing to do */ - if (!list_empty (&xattr_handlers)) { - write_unlock (&handler_lock); - return 0; - } + /* If we're already initialized, nothing to do */ + if (!list_empty(&xattr_handlers)) { + write_unlock(&handler_lock); + return 0; + } - /* Add the handlers */ - list_add_tail (&user_handler.handlers, &xattr_handlers); - list_add_tail (&trusted_handler.handlers, &xattr_handlers); + /* Add the handlers */ + list_add_tail(&user_handler.handlers, &xattr_handlers); + list_add_tail(&trusted_handler.handlers, &xattr_handlers); #ifdef CONFIG_REISERFS_FS_SECURITY - list_add_tail (&security_handler.handlers, &xattr_handlers); + list_add_tail(&security_handler.handlers, &xattr_handlers); #endif #ifdef CONFIG_REISERFS_FS_POSIX_ACL - list_add_tail (&posix_acl_access_handler.handlers, &xattr_handlers); - list_add_tail (&posix_acl_default_handler.handlers, &xattr_handlers); + list_add_tail(&posix_acl_access_handler.handlers, &xattr_handlers); + list_add_tail(&posix_acl_default_handler.handlers, &xattr_handlers); #endif - /* Run initializers, if available */ - list_for_each (p, &xattr_handlers) { - xah = list_entry (p, struct reiserfs_xattr_handler, handlers); - if (xah->init) { - err = xah->init (); - if (err) { - list_del_init (p); - break; - } - } - } - - /* Clean up other handlers, if any failed */ - if (err) - __unregister_handlers (); - - write_unlock (&handler_lock); - return err; + /* Run initializers, if available */ + list_for_each(p, &xattr_handlers) { + xah = list_entry(p, struct reiserfs_xattr_handler, handlers); + if (xah->init) { + err = xah->init(); + if (err) { + list_del_init(p); + break; + } + } + } + + /* Clean up other handlers, if any failed */ + if (err) + __unregister_handlers(); + + write_unlock(&handler_lock); + return err; } -void -reiserfs_xattr_unregister_handlers (void) +void reiserfs_xattr_unregister_handlers(void) { - write_lock (&handler_lock); - __unregister_handlers (); - write_unlock (&handler_lock); + write_lock(&handler_lock); + __unregister_handlers(); + write_unlock(&handler_lock); } /* This will catch lookups from the fs root to .reiserfs_priv */ static int -xattr_lookup_poison (struct dentry *dentry, struct qstr *q1, struct qstr *name) +xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) { - struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; - if (name->len == priv_root->d_name.len && - name->hash == priv_root->d_name.hash && - !memcmp (name->name, priv_root->d_name.name, name->len)) { - return -ENOENT; - } else if (q1->len == name->len && - !memcmp(q1->name, name->name, name->len)) - return 0; - return 1; + struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; + if (name->len == priv_root->d_name.len && + name->hash == priv_root->d_name.hash && + !memcmp(name->name, priv_root->d_name.name, name->len)) { + return -ENOENT; + } else if (q1->len == name->len && + !memcmp(q1->name, name->name, name->len)) + return 0; + return 1; } static struct dentry_operations xattr_lookup_poison_ops = { - .d_compare = xattr_lookup_poison, + .d_compare = xattr_lookup_poison, }; - /* We need to take a copy of the mount flags since things like * MS_RDONLY don't get set until *after* we're called. * mount_flags != mount_options */ -int -reiserfs_xattr_init (struct super_block *s, int mount_flags) +int reiserfs_xattr_init(struct super_block *s, int mount_flags) { - int err = 0; - - /* We need generation numbers to ensure that the oid mapping is correct - * v3.5 filesystems don't have them. */ - if (!old_format_only (s)) { - set_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); - } else if (reiserfs_xattrs_optional (s)) { - /* Old format filesystem, but optional xattrs have been enabled - * at mount time. Error out. */ - reiserfs_warning (s, "xattrs/ACLs not supported on pre v3.6 " - "format filesystem. Failing mount."); - err = -EOPNOTSUPP; - goto error; - } else { - /* Old format filesystem, but no optional xattrs have been enabled. This - * means we silently disable xattrs on the filesystem. */ - clear_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); - } - - /* If we don't have the privroot located yet - go find it */ - if (reiserfs_xattrs (s) && !REISERFS_SB(s)->priv_root) { - struct dentry *dentry; - dentry = lookup_one_len (PRIVROOT_NAME, s->s_root, - strlen (PRIVROOT_NAME)); - if (!IS_ERR (dentry)) { - if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { - struct inode *inode = dentry->d_parent->d_inode; - down (&inode->i_sem); - err = inode->i_op->mkdir (inode, dentry, 0700); - up (&inode->i_sem); - if (err) { - dput (dentry); - dentry = NULL; - } - - if (dentry && dentry->d_inode) - reiserfs_warning (s, "Created %s on %s - reserved for " - "xattr storage.", PRIVROOT_NAME, - reiserfs_bdevname (inode->i_sb)); - } else if (!dentry->d_inode) { - dput (dentry); - dentry = NULL; - } - } else - err = PTR_ERR (dentry); - - if (!err && dentry) { - s->s_root->d_op = &xattr_lookup_poison_ops; - reiserfs_mark_inode_private (dentry->d_inode); - REISERFS_SB(s)->priv_root = dentry; - } else if (!(mount_flags & MS_RDONLY)) { /* xattrs are unavailable */ - /* If we're read-only it just means that the dir hasn't been - * created. Not an error -- just no xattrs on the fs. We'll - * check again if we go read-write */ - reiserfs_warning (s, "xattrs/ACLs enabled and couldn't " - "find/create .reiserfs_priv. Failing mount."); - err = -EOPNOTSUPP; - } - } - -error: - /* This is only nonzero if there was an error initializing the xattr - * directory or if there is a condition where we don't support them. */ - if (err) { - clear_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); - clear_bit (REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); - clear_bit (REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); - } - - /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ - s->s_flags = s->s_flags & ~MS_POSIXACL; - if (reiserfs_posixacl (s)) - s->s_flags |= MS_POSIXACL; - - return err; + int err = 0; + + /* We need generation numbers to ensure that the oid mapping is correct + * v3.5 filesystems don't have them. */ + if (!old_format_only(s)) { + set_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); + } else if (reiserfs_xattrs_optional(s)) { + /* Old format filesystem, but optional xattrs have been enabled + * at mount time. Error out. */ + reiserfs_warning(s, "xattrs/ACLs not supported on pre v3.6 " + "format filesystem. Failing mount."); + err = -EOPNOTSUPP; + goto error; + } else { + /* Old format filesystem, but no optional xattrs have been enabled. This + * means we silently disable xattrs on the filesystem. */ + clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); + } + + /* If we don't have the privroot located yet - go find it */ + if (reiserfs_xattrs(s) && !REISERFS_SB(s)->priv_root) { + struct dentry *dentry; + dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, + strlen(PRIVROOT_NAME)); + if (!IS_ERR(dentry)) { + if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { + struct inode *inode = dentry->d_parent->d_inode; + down(&inode->i_sem); + err = inode->i_op->mkdir(inode, dentry, 0700); + up(&inode->i_sem); + if (err) { + dput(dentry); + dentry = NULL; + } + + if (dentry && dentry->d_inode) + reiserfs_warning(s, + "Created %s on %s - reserved for " + "xattr storage.", + PRIVROOT_NAME, + reiserfs_bdevname + (inode->i_sb)); + } else if (!dentry->d_inode) { + dput(dentry); + dentry = NULL; + } + } else + err = PTR_ERR(dentry); + + if (!err && dentry) { + s->s_root->d_op = &xattr_lookup_poison_ops; + reiserfs_mark_inode_private(dentry->d_inode); + REISERFS_SB(s)->priv_root = dentry; + } else if (!(mount_flags & MS_RDONLY)) { /* xattrs are unavailable */ + /* If we're read-only it just means that the dir hasn't been + * created. Not an error -- just no xattrs on the fs. We'll + * check again if we go read-write */ + reiserfs_warning(s, "xattrs/ACLs enabled and couldn't " + "find/create .reiserfs_priv. Failing mount."); + err = -EOPNOTSUPP; + } + } + + error: + /* This is only nonzero if there was an error initializing the xattr + * directory or if there is a condition where we don't support them. */ + if (err) { + clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); + clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); + clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); + } + + /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ + s->s_flags = s->s_flags & ~MS_POSIXACL; + if (reiserfs_posixacl(s)) + s->s_flags |= MS_POSIXACL; + + return err; } static int -__reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd, - int need_lock) +__reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd, + int need_lock) { - umode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; if (mask & MAY_WRITE) { /* @@ -1363,50 +1365,50 @@ __reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd, } /* We don't do permission checks on the internal objects. - * Permissions are determined by the "owning" object. */ - if (is_reiserfs_priv_object (inode)) + * Permissions are determined by the "owning" object. */ + if (is_reiserfs_priv_object(inode)) return 0; if (current->fsuid == inode->i_uid) { mode >>= 6; #ifdef CONFIG_REISERFS_FS_POSIX_ACL } else if (reiserfs_posixacl(inode->i_sb) && - get_inode_sd_version (inode) != STAT_DATA_V1) { - struct posix_acl *acl; + get_inode_sd_version(inode) != STAT_DATA_V1) { + struct posix_acl *acl; /* ACL can't contain additional permissions if the ACL_MASK entry is 0 */ if (!(mode & S_IRWXG)) goto check_groups; - if (need_lock) { - reiserfs_read_lock_xattr_i (inode); - reiserfs_read_lock_xattrs (inode->i_sb); + if (need_lock) { + reiserfs_read_lock_xattr_i(inode); + reiserfs_read_lock_xattrs(inode->i_sb); + } + acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); + if (need_lock) { + reiserfs_read_unlock_xattrs(inode->i_sb); + reiserfs_read_unlock_xattr_i(inode); } - acl = reiserfs_get_acl (inode, ACL_TYPE_ACCESS); - if (need_lock) { - reiserfs_read_unlock_xattrs (inode->i_sb); - reiserfs_read_unlock_xattr_i (inode); + if (IS_ERR(acl)) { + if (PTR_ERR(acl) == -ENODATA) + goto check_groups; + return PTR_ERR(acl); } - if (IS_ERR (acl)) { - if (PTR_ERR (acl) == -ENODATA) - goto check_groups; - return PTR_ERR (acl); - } - - if (acl) { - int err = posix_acl_permission (inode, acl, mask); - posix_acl_release (acl); - if (err == -EACCES) { - goto check_capabilities; - } - return err; + + if (acl) { + int err = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + if (err == -EACCES) { + goto check_capabilities; + } + return err; } else { goto check_groups; - } + } #endif } else { -check_groups: + check_groups: if (in_group_p(inode->i_gid)) mode >>= 3; } @@ -1414,10 +1416,10 @@ check_groups: /* * If the DACs are ok we don't need any capability check. */ - if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) + if (((mode & mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == mask)) return 0; -check_capabilities: + check_capabilities: /* * Read/write DACs are always overridable. * Executable DACs are overridable if at least one exec bit is set. @@ -1437,14 +1439,13 @@ check_capabilities: return -EACCES; } -int -reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd) +int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd) { - return __reiserfs_permission (inode, mask, nd, 1); + return __reiserfs_permission(inode, mask, nd, 1); } int -reiserfs_permission_locked (struct inode *inode, int mask, struct nameidata *nd) +reiserfs_permission_locked(struct inode *inode, int mask, struct nameidata *nd) { - return __reiserfs_permission (inode, mask, nd, 0); + return __reiserfs_permission(inode, mask, nd, 0); } diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index c312881c5f5..6703efa3c43 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -9,7 +9,8 @@ #include #include -static int reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl); +static int reiserfs_set_acl(struct inode *inode, int type, + struct posix_acl *acl); static int xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) @@ -34,14 +35,13 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) } else acl = NULL; - error = reiserfs_set_acl (inode, type, acl); + error = reiserfs_set_acl(inode, type, acl); -release_and_out: + release_and_out: posix_acl_release(acl); return error; } - static int xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) { @@ -51,7 +51,7 @@ xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) if (!reiserfs_posixacl(inode->i_sb)) return -EOPNOTSUPP; - acl = reiserfs_get_acl (inode, type); + acl = reiserfs_get_acl(inode, type); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) @@ -62,12 +62,10 @@ xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) return error; } - /* * Convert from filesystem to in-memory representation. */ -static struct posix_acl * -posix_acl_from_disk(const void *value, size_t size) +static struct posix_acl *posix_acl_from_disk(const void *value, size_t size) { const char *end = (char *)value + size; int n, count; @@ -76,8 +74,8 @@ posix_acl_from_disk(const void *value, size_t size) if (!value) return NULL; if (size < sizeof(reiserfs_acl_header)) - return ERR_PTR(-EINVAL); - if (((reiserfs_acl_header *)value)->a_version != + return ERR_PTR(-EINVAL); + if (((reiserfs_acl_header *) value)->a_version != cpu_to_le32(REISERFS_ACL_VERSION)) return ERR_PTR(-EINVAL); value = (char *)value + sizeof(reiserfs_acl_header); @@ -89,41 +87,39 @@ posix_acl_from_disk(const void *value, size_t size) acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); - for (n=0; n < count; n++) { - reiserfs_acl_entry *entry = - (reiserfs_acl_entry *)value; + for (n = 0; n < count; n++) { + reiserfs_acl_entry *entry = (reiserfs_acl_entry *) value; if ((char *)value + sizeof(reiserfs_acl_entry_short) > end) goto fail; - acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - value = (char *)value + - sizeof(reiserfs_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; - break; - - case ACL_USER: - case ACL_GROUP: - value = (char *)value + sizeof(reiserfs_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_id = - le32_to_cpu(entry->e_id); - break; - - default: + switch (acl->a_entries[n].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + value = (char *)value + + sizeof(reiserfs_acl_entry_short); + acl->a_entries[n].e_id = ACL_UNDEFINED_ID; + break; + + case ACL_USER: + case ACL_GROUP: + value = (char *)value + sizeof(reiserfs_acl_entry); + if ((char *)value > end) goto fail; + acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); + break; + + default: + goto fail; } } if (value != end) goto fail; return acl; -fail: + fail: posix_acl_release(acl); return ERR_PTR(-EINVAL); } @@ -131,46 +127,46 @@ fail: /* * Convert from in-memory to filesystem representation. */ -static void * -posix_acl_to_disk(const struct posix_acl *acl, size_t *size) +static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size) { reiserfs_acl_header *ext_acl; char *e; int n; *size = reiserfs_acl_size(acl->a_count); - ext_acl = (reiserfs_acl_header *)kmalloc(sizeof(reiserfs_acl_header) + - acl->a_count * sizeof(reiserfs_acl_entry), GFP_NOFS); + ext_acl = (reiserfs_acl_header *) kmalloc(sizeof(reiserfs_acl_header) + + acl->a_count * + sizeof(reiserfs_acl_entry), + GFP_NOFS); if (!ext_acl) return ERR_PTR(-ENOMEM); ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); e = (char *)ext_acl + sizeof(reiserfs_acl_header); - for (n=0; n < acl->a_count; n++) { - reiserfs_acl_entry *entry = (reiserfs_acl_entry *)e; - entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); + for (n = 0; n < acl->a_count; n++) { + reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e; + entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER: - case ACL_GROUP: - entry->e_id = - cpu_to_le32(acl->a_entries[n].e_id); - e += sizeof(reiserfs_acl_entry); - break; - - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - e += sizeof(reiserfs_acl_entry_short); - break; - - default: - goto fail; + switch (acl->a_entries[n].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); + e += sizeof(reiserfs_acl_entry); + break; + + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + e += sizeof(reiserfs_acl_entry_short); + break; + + default: + goto fail; } } return (char *)ext_acl; -fail: + fail: kfree(ext_acl); return ERR_PTR(-EINVAL); } @@ -181,59 +177,58 @@ fail: * inode->i_sem: down * BKL held [before 2.5.x] */ -struct posix_acl * -reiserfs_get_acl(struct inode *inode, int type) +struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) { char *name, *value; struct posix_acl *acl, **p_acl; size_t size; int retval; - struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); - - switch (type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - p_acl = &reiserfs_i->i_acl_access; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &reiserfs_i->i_acl_default; - break; - default: - return ERR_PTR (-EINVAL); - } - - if (IS_ERR (*p_acl)) { - if (PTR_ERR (*p_acl) == -ENODATA) - return NULL; - } else if (*p_acl != NULL) - return posix_acl_dup (*p_acl); - - size = reiserfs_xattr_get (inode, name, NULL, 0); - if ((int)size < 0) { - if (size == -ENODATA || size == -ENOSYS) { - *p_acl = ERR_PTR (-ENODATA); - return NULL; - } - return ERR_PTR (size); - } + struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); + + switch (type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + p_acl = &reiserfs_i->i_acl_access; + break; + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + p_acl = &reiserfs_i->i_acl_default; + break; + default: + return ERR_PTR(-EINVAL); + } + + if (IS_ERR(*p_acl)) { + if (PTR_ERR(*p_acl) == -ENODATA) + return NULL; + } else if (*p_acl != NULL) + return posix_acl_dup(*p_acl); + + size = reiserfs_xattr_get(inode, name, NULL, 0); + if ((int)size < 0) { + if (size == -ENODATA || size == -ENOSYS) { + *p_acl = ERR_PTR(-ENODATA); + return NULL; + } + return ERR_PTR(size); + } - value = kmalloc (size, GFP_NOFS); - if (!value) - return ERR_PTR (-ENOMEM); + value = kmalloc(size, GFP_NOFS); + if (!value) + return ERR_PTR(-ENOMEM); retval = reiserfs_xattr_get(inode, name, value, size); if (retval == -ENODATA || retval == -ENOSYS) { /* This shouldn't actually happen as it should have been caught above.. but just in case */ acl = NULL; - *p_acl = ERR_PTR (-ENODATA); - } else if (retval < 0) { + *p_acl = ERR_PTR(-ENODATA); + } else if (retval < 0) { acl = ERR_PTR(retval); } else { acl = posix_acl_from_disk(value, retval); - *p_acl = posix_acl_dup (acl); - } + *p_acl = posix_acl_dup(acl); + } kfree(value); return acl; @@ -248,72 +243,72 @@ reiserfs_get_acl(struct inode *inode, int type) static int reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) { - char *name; + char *name; void *value = NULL; struct posix_acl **p_acl; size_t size; int error; - struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); + struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - switch (type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - p_acl = &reiserfs_i->i_acl_access; - if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode (acl, &mode); - if (error < 0) - return error; - else { - inode->i_mode = mode; - if (error == 0) - acl = NULL; - } - } - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &reiserfs_i->i_acl_default; - if (!S_ISDIR (inode->i_mode)) - return acl ? -EACCES : 0; - break; - default: - return -EINVAL; - } - - if (acl) { - value = posix_acl_to_disk(acl, &size); - if (IS_ERR(value)) - return (int)PTR_ERR(value); - error = reiserfs_xattr_set(inode, name, value, size, 0); + switch (type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + p_acl = &reiserfs_i->i_acl_access; + if (acl) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; + else { + inode->i_mode = mode; + if (error == 0) + acl = NULL; + } + } + break; + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + p_acl = &reiserfs_i->i_acl_default; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + default: + return -EINVAL; + } + + if (acl) { + value = posix_acl_to_disk(acl, &size); + if (IS_ERR(value)) + return (int)PTR_ERR(value); + error = reiserfs_xattr_set(inode, name, value, size, 0); } else { - error = reiserfs_xattr_del (inode, name); - if (error == -ENODATA) { - /* This may seem odd here, but it means that the ACL was set - * with a value representable with mode bits. If there was - * an ACL before, reiserfs_xattr_del already dirtied the inode. - */ - mark_inode_dirty (inode); - error = 0; - } - } + error = reiserfs_xattr_del(inode, name); + if (error == -ENODATA) { + /* This may seem odd here, but it means that the ACL was set + * with a value representable with mode bits. If there was + * an ACL before, reiserfs_xattr_del already dirtied the inode. + */ + mark_inode_dirty(inode); + error = 0; + } + } if (value) kfree(value); - if (!error) { - /* Release the old one */ - if (!IS_ERR (*p_acl) && *p_acl) - posix_acl_release (*p_acl); + if (!error) { + /* Release the old one */ + if (!IS_ERR(*p_acl) && *p_acl) + posix_acl_release(*p_acl); - if (acl == NULL) - *p_acl = ERR_PTR (-ENODATA); - else - *p_acl = posix_acl_dup (acl); - } + if (acl == NULL) + *p_acl = ERR_PTR(-ENODATA); + else + *p_acl = posix_acl_dup(acl); + } return error; } @@ -321,192 +316,190 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) /* dir->i_sem: down, * inode is new and not released into the wild yet */ int -reiserfs_inherit_default_acl (struct inode *dir, struct dentry *dentry, struct inode *inode) +reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, + struct inode *inode) { - struct posix_acl *acl; - int err = 0; - - /* ACLs only get applied to files and directories */ - if (S_ISLNK (inode->i_mode)) - return 0; - - /* ACLs can only be used on "new" objects, so if it's an old object - * there is nothing to inherit from */ - if (get_inode_sd_version (dir) == STAT_DATA_V1) - goto apply_umask; - - /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This - * would be useless since permissions are ignored, and a pain because - * it introduces locking cycles */ - if (is_reiserfs_priv_object (dir)) { - reiserfs_mark_inode_private (inode); - goto apply_umask; - } - - acl = reiserfs_get_acl (dir, ACL_TYPE_DEFAULT); - if (IS_ERR (acl)) { - if (PTR_ERR (acl) == -ENODATA) - goto apply_umask; - return PTR_ERR (acl); - } - - if (acl) { - struct posix_acl *acl_copy; - mode_t mode = inode->i_mode; - int need_acl; - - /* Copy the default ACL to the default ACL of a new directory */ - if (S_ISDIR (inode->i_mode)) { - err = reiserfs_set_acl (inode, ACL_TYPE_DEFAULT, acl); - if (err) - goto cleanup; - } - - /* Now we reconcile the new ACL and the mode, - potentially modifying both */ - acl_copy = posix_acl_clone (acl, GFP_NOFS); - if (!acl_copy) { - err = -ENOMEM; - goto cleanup; - } - - - need_acl = posix_acl_create_masq (acl_copy, &mode); - if (need_acl >= 0) { - if (mode != inode->i_mode) { - inode->i_mode = mode; - } - - /* If we need an ACL.. */ - if (need_acl > 0) { - err = reiserfs_set_acl (inode, ACL_TYPE_ACCESS, acl_copy); - if (err) - goto cleanup_copy; - } - } -cleanup_copy: - posix_acl_release (acl_copy); -cleanup: - posix_acl_release (acl); - } else { -apply_umask: - /* no ACL, apply umask */ - inode->i_mode &= ~current->fs->umask; - } - - return err; + struct posix_acl *acl; + int err = 0; + + /* ACLs only get applied to files and directories */ + if (S_ISLNK(inode->i_mode)) + return 0; + + /* ACLs can only be used on "new" objects, so if it's an old object + * there is nothing to inherit from */ + if (get_inode_sd_version(dir) == STAT_DATA_V1) + goto apply_umask; + + /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This + * would be useless since permissions are ignored, and a pain because + * it introduces locking cycles */ + if (is_reiserfs_priv_object(dir)) { + reiserfs_mark_inode_private(inode); + goto apply_umask; + } + + acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + if (PTR_ERR(acl) == -ENODATA) + goto apply_umask; + return PTR_ERR(acl); + } + + if (acl) { + struct posix_acl *acl_copy; + mode_t mode = inode->i_mode; + int need_acl; + + /* Copy the default ACL to the default ACL of a new directory */ + if (S_ISDIR(inode->i_mode)) { + err = reiserfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (err) + goto cleanup; + } + + /* Now we reconcile the new ACL and the mode, + potentially modifying both */ + acl_copy = posix_acl_clone(acl, GFP_NOFS); + if (!acl_copy) { + err = -ENOMEM; + goto cleanup; + } + + need_acl = posix_acl_create_masq(acl_copy, &mode); + if (need_acl >= 0) { + if (mode != inode->i_mode) { + inode->i_mode = mode; + } + + /* If we need an ACL.. */ + if (need_acl > 0) { + err = + reiserfs_set_acl(inode, ACL_TYPE_ACCESS, + acl_copy); + if (err) + goto cleanup_copy; + } + } + cleanup_copy: + posix_acl_release(acl_copy); + cleanup: + posix_acl_release(acl); + } else { + apply_umask: + /* no ACL, apply umask */ + inode->i_mode &= ~current->fs->umask; + } + + return err; } /* Looks up and caches the result of the default ACL. * We do this so that we don't need to carry the xattr_sem into * reiserfs_new_inode if we don't need to */ -int -reiserfs_cache_default_acl (struct inode *inode) +int reiserfs_cache_default_acl(struct inode *inode) { - int ret = 0; - if (reiserfs_posixacl (inode->i_sb) && - !is_reiserfs_priv_object (inode)) { - struct posix_acl *acl; - reiserfs_read_lock_xattr_i (inode); - reiserfs_read_lock_xattrs (inode->i_sb); - acl = reiserfs_get_acl (inode, ACL_TYPE_DEFAULT); - reiserfs_read_unlock_xattrs (inode->i_sb); - reiserfs_read_unlock_xattr_i (inode); - ret = acl ? 1 : 0; - posix_acl_release (acl); - } - - return ret; + int ret = 0; + if (reiserfs_posixacl(inode->i_sb) && !is_reiserfs_priv_object(inode)) { + struct posix_acl *acl; + reiserfs_read_lock_xattr_i(inode); + reiserfs_read_lock_xattrs(inode->i_sb); + acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); + reiserfs_read_unlock_xattrs(inode->i_sb); + reiserfs_read_unlock_xattr_i(inode); + ret = acl ? 1 : 0; + posix_acl_release(acl); + } + + return ret; } -int -reiserfs_acl_chmod (struct inode *inode) +int reiserfs_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; - int error; + struct posix_acl *acl, *clone; + int error; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; - if (get_inode_sd_version (inode) == STAT_DATA_V1 || - !reiserfs_posixacl(inode->i_sb)) - { - return 0; + if (get_inode_sd_version(inode) == STAT_DATA_V1 || + !reiserfs_posixacl(inode->i_sb)) { + return 0; } - reiserfs_read_lock_xattrs (inode->i_sb); - acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); - reiserfs_read_unlock_xattrs (inode->i_sb); - if (!acl) - return 0; - if (IS_ERR(acl)) - return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_NOFS); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) { - int lock = !has_xattr_dir (inode); - reiserfs_write_lock_xattr_i (inode); - if (lock) - reiserfs_write_lock_xattrs (inode->i_sb); - else - reiserfs_read_lock_xattrs (inode->i_sb); - error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); - if (lock) - reiserfs_write_unlock_xattrs (inode->i_sb); - else - reiserfs_read_unlock_xattrs (inode->i_sb); - reiserfs_write_unlock_xattr_i (inode); - } - posix_acl_release(clone); - return error; + reiserfs_read_lock_xattrs(inode->i_sb); + acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); + reiserfs_read_unlock_xattrs(inode->i_sb); + if (!acl) + return 0; + if (IS_ERR(acl)) + return PTR_ERR(acl); + clone = posix_acl_clone(acl, GFP_NOFS); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) { + int lock = !has_xattr_dir(inode); + reiserfs_write_lock_xattr_i(inode); + if (lock) + reiserfs_write_lock_xattrs(inode->i_sb); + else + reiserfs_read_lock_xattrs(inode->i_sb); + error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + if (lock) + reiserfs_write_unlock_xattrs(inode->i_sb); + else + reiserfs_read_unlock_xattrs(inode->i_sb); + reiserfs_write_unlock_xattr_i(inode); + } + posix_acl_release(clone); + return error; } static int posix_acl_access_get(struct inode *inode, const char *name, - void *buffer, size_t size) + void *buffer, size_t size) { - if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1) return -EINVAL; return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); } static int posix_acl_access_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) + const void *value, size_t size, int flags) { - if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1) return -EINVAL; return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); } -static int -posix_acl_access_del (struct inode *inode, const char *name) +static int posix_acl_access_del(struct inode *inode, const char *name) { - struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); - struct posix_acl **acl = &reiserfs_i->i_acl_access; - if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) - return -EINVAL; - if (!IS_ERR (*acl) && *acl) { - posix_acl_release (*acl); - *acl = ERR_PTR (-ENODATA); - } - - return 0; + struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); + struct posix_acl **acl = &reiserfs_i->i_acl_access; + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1) + return -EINVAL; + if (!IS_ERR(*acl) && *acl) { + posix_acl_release(*acl); + *acl = ERR_PTR(-ENODATA); + } + + return 0; } static int -posix_acl_access_list (struct inode *inode, const char *name, int namelen, char *out) +posix_acl_access_list(struct inode *inode, const char *name, int namelen, + char *out) { - int len = namelen; - if (!reiserfs_posixacl (inode->i_sb)) - return 0; - if (out) - memcpy (out, name, len); + int len = namelen; + if (!reiserfs_posixacl(inode->i_sb)) + return 0; + if (out) + memcpy(out, name, len); - return len; + return len; } struct reiserfs_xattr_handler posix_acl_access_handler = { @@ -518,48 +511,48 @@ struct reiserfs_xattr_handler posix_acl_access_handler = { }; static int -posix_acl_default_get (struct inode *inode, const char *name, - void *buffer, size_t size) +posix_acl_default_get(struct inode *inode, const char *name, + void *buffer, size_t size) { - if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) return -EINVAL; return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); } static int posix_acl_default_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) + const void *value, size_t size, int flags) { - if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) return -EINVAL; return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); } -static int -posix_acl_default_del (struct inode *inode, const char *name) +static int posix_acl_default_del(struct inode *inode, const char *name) { - struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); - struct posix_acl **acl = &reiserfs_i->i_acl_default; - if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) - return -EINVAL; - if (!IS_ERR (*acl) && *acl) { - posix_acl_release (*acl); - *acl = ERR_PTR (-ENODATA); - } - - return 0; + struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); + struct posix_acl **acl = &reiserfs_i->i_acl_default; + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) + return -EINVAL; + if (!IS_ERR(*acl) && *acl) { + posix_acl_release(*acl); + *acl = ERR_PTR(-ENODATA); + } + + return 0; } static int -posix_acl_default_list (struct inode *inode, const char *name, int namelen, char *out) +posix_acl_default_list(struct inode *inode, const char *name, int namelen, + char *out) { - int len = namelen; - if (!reiserfs_posixacl (inode->i_sb)) - return 0; - if (out) - memcpy (out, name, len); + int len = namelen; + if (!reiserfs_posixacl(inode->i_sb)) + return 0; + if (out) + memcpy(out, name, len); - return len; + return len; } struct reiserfs_xattr_handler posix_acl_default_handler = { diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index e044d511711..5e90a95ad60 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -9,57 +9,55 @@ #define XATTR_SECURITY_PREFIX "security." static int -security_get (struct inode *inode, const char *name, void *buffer, size_t size) +security_get(struct inode *inode, const char *name, void *buffer, size_t size) { - if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) + return -EINVAL; - if (is_reiserfs_priv_object(inode)) - return -EPERM; + if (is_reiserfs_priv_object(inode)) + return -EPERM; - return reiserfs_xattr_get (inode, name, buffer, size); + return reiserfs_xattr_get(inode, name, buffer, size); } static int -security_set (struct inode *inode, const char *name, const void *buffer, - size_t size, int flags) +security_set(struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) { - if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) + return -EINVAL; - if (is_reiserfs_priv_object(inode)) - return -EPERM; + if (is_reiserfs_priv_object(inode)) + return -EPERM; - return reiserfs_xattr_set (inode, name, buffer, size, flags); + return reiserfs_xattr_set(inode, name, buffer, size, flags); } -static int -security_del (struct inode *inode, const char *name) +static int security_del(struct inode *inode, const char *name) { - if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) + return -EINVAL; - if (is_reiserfs_priv_object(inode)) - return -EPERM; + if (is_reiserfs_priv_object(inode)) + return -EPERM; - return 0; + return 0; } static int -security_list (struct inode *inode, const char *name, int namelen, char *out) +security_list(struct inode *inode, const char *name, int namelen, char *out) { - int len = namelen; + int len = namelen; - if (is_reiserfs_priv_object(inode)) - return 0; + if (is_reiserfs_priv_object(inode)) + return 0; - if (out) - memcpy (out, name, len); + if (out) + memcpy(out, name, len); - return len; + return len; } - struct reiserfs_xattr_handler security_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = security_get, diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index 43762197fb0..2501f7e66ab 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -9,69 +9,67 @@ #define XATTR_TRUSTED_PREFIX "trusted." static int -trusted_get (struct inode *inode, const char *name, void *buffer, size_t size) +trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) { - if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) + return -EINVAL; - if (!reiserfs_xattrs (inode->i_sb)) - return -EOPNOTSUPP; + if (!reiserfs_xattrs(inode->i_sb)) + return -EOPNOTSUPP; - if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) - return -EPERM; + if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) + return -EPERM; - return reiserfs_xattr_get (inode, name, buffer, size); + return reiserfs_xattr_get(inode, name, buffer, size); } static int -trusted_set (struct inode *inode, const char *name, const void *buffer, - size_t size, int flags) +trusted_set(struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) { - if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) + return -EINVAL; - if (!reiserfs_xattrs (inode->i_sb)) - return -EOPNOTSUPP; + if (!reiserfs_xattrs(inode->i_sb)) + return -EOPNOTSUPP; - if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) - return -EPERM; + if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) + return -EPERM; - return reiserfs_xattr_set (inode, name, buffer, size, flags); + return reiserfs_xattr_set(inode, name, buffer, size, flags); } -static int -trusted_del (struct inode *inode, const char *name) +static int trusted_del(struct inode *inode, const char *name) { - if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) + return -EINVAL; - if (!reiserfs_xattrs (inode->i_sb)) - return -EOPNOTSUPP; + if (!reiserfs_xattrs(inode->i_sb)) + return -EOPNOTSUPP; - if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) - return -EPERM; + if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) + return -EPERM; - return 0; + return 0; } static int -trusted_list (struct inode *inode, const char *name, int namelen, char *out) +trusted_list(struct inode *inode, const char *name, int namelen, char *out) { - int len = namelen; + int len = namelen; - if (!reiserfs_xattrs (inode->i_sb)) - return 0; + if (!reiserfs_xattrs(inode->i_sb)) + return 0; - if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) - return 0; + if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) + return 0; - if (out) - memcpy (out, name, len); + if (out) + memcpy(out, name, len); - return len; + return len; } - struct reiserfs_xattr_handler trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, .get = trusted_get, diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 0772806466a..51458048ca6 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -13,81 +13,80 @@ #define XATTR_USER_PREFIX "user." static int -user_get (struct inode *inode, const char *name, void *buffer, size_t size) +user_get(struct inode *inode, const char *name, void *buffer, size_t size) { - int error; + int error; - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_USER_PREFIX)) + return -EINVAL; - if (!reiserfs_xattrs_user (inode->i_sb)) - return -EOPNOTSUPP; + if (!reiserfs_xattrs_user(inode->i_sb)) + return -EOPNOTSUPP; - error = reiserfs_permission_locked (inode, MAY_READ, NULL); - if (error) - return error; + error = reiserfs_permission_locked(inode, MAY_READ, NULL); + if (error) + return error; - return reiserfs_xattr_get (inode, name, buffer, size); + return reiserfs_xattr_get(inode, name, buffer, size); } static int -user_set (struct inode *inode, const char *name, const void *buffer, - size_t size, int flags) +user_set(struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) { - int error; + int error; - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_USER_PREFIX)) + return -EINVAL; - if (!reiserfs_xattrs_user (inode->i_sb)) - return -EOPNOTSUPP; + if (!reiserfs_xattrs_user(inode->i_sb)) + return -EOPNOTSUPP; - if (!S_ISREG (inode->i_mode) && - (!S_ISDIR (inode->i_mode) || inode->i_mode & S_ISVTX)) - return -EPERM; + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; - error = reiserfs_permission_locked (inode, MAY_WRITE, NULL); - if (error) - return error; + error = reiserfs_permission_locked(inode, MAY_WRITE, NULL); + if (error) + return error; - return reiserfs_xattr_set (inode, name, buffer, size, flags); + return reiserfs_xattr_set(inode, name, buffer, size, flags); } -static int -user_del (struct inode *inode, const char *name) +static int user_del(struct inode *inode, const char *name) { - int error; + int error; - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) - return -EINVAL; + if (strlen(name) < sizeof(XATTR_USER_PREFIX)) + return -EINVAL; - if (!reiserfs_xattrs_user (inode->i_sb)) - return -EOPNOTSUPP; + if (!reiserfs_xattrs_user(inode->i_sb)) + return -EOPNOTSUPP; - if (!S_ISREG (inode->i_mode) && - (!S_ISDIR (inode->i_mode) || inode->i_mode & S_ISVTX)) - return -EPERM; + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; - error = reiserfs_permission_locked (inode, MAY_WRITE, NULL); - if (error) - return error; + error = reiserfs_permission_locked(inode, MAY_WRITE, NULL); + if (error) + return error; - return 0; + return 0; } static int -user_list (struct inode *inode, const char *name, int namelen, char *out) +user_list(struct inode *inode, const char *name, int namelen, char *out) { - int len = namelen; - if (!reiserfs_xattrs_user (inode->i_sb)) - return 0; + int len = namelen; + if (!reiserfs_xattrs_user(inode->i_sb)) + return 0; - if (out) - memcpy (out, name, len); + if (out) + memcpy(out, name, len); - return len; + return len; } struct reiserfs_xattr_handler user_handler = { diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h index 0760507a545..0a3605099c4 100644 --- a/include/linux/reiserfs_acl.h +++ b/include/linux/reiserfs_acl.h @@ -4,29 +4,29 @@ #define REISERFS_ACL_VERSION 0x0001 typedef struct { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; + __le16 e_tag; + __le16 e_perm; + __le32 e_id; } reiserfs_acl_entry; typedef struct { - __le16 e_tag; - __le16 e_perm; + __le16 e_tag; + __le16 e_perm; } reiserfs_acl_entry_short; typedef struct { - __le32 a_version; + __le32 a_version; } reiserfs_acl_header; static inline size_t reiserfs_acl_size(int count) { if (count <= 4) { return sizeof(reiserfs_acl_header) + - count * sizeof(reiserfs_acl_entry_short); + count * sizeof(reiserfs_acl_entry_short); } else { return sizeof(reiserfs_acl_header) + - 4 * sizeof(reiserfs_acl_entry_short) + - (count - 4) * sizeof(reiserfs_acl_entry); + 4 * sizeof(reiserfs_acl_entry_short) + + (count - 4) * sizeof(reiserfs_acl_entry); } } @@ -46,14 +46,14 @@ static inline int reiserfs_acl_count(size_t size) } } - #ifdef CONFIG_REISERFS_FS_POSIX_ACL -struct posix_acl * reiserfs_get_acl(struct inode *inode, int type); -int reiserfs_acl_chmod (struct inode *inode); -int reiserfs_inherit_default_acl (struct inode *dir, struct dentry *dentry, struct inode *inode); -int reiserfs_cache_default_acl (struct inode *dir); -extern int reiserfs_xattr_posix_acl_init (void) __init; -extern int reiserfs_xattr_posix_acl_exit (void); +struct posix_acl *reiserfs_get_acl(struct inode *inode, int type); +int reiserfs_acl_chmod(struct inode *inode); +int reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, + struct inode *inode); +int reiserfs_cache_default_acl(struct inode *dir); +extern int reiserfs_xattr_posix_acl_init(void) __init; +extern int reiserfs_xattr_posix_acl_exit(void); extern struct reiserfs_xattr_handler posix_acl_default_handler; extern struct reiserfs_xattr_handler posix_acl_access_handler; #else @@ -61,28 +61,26 @@ extern struct reiserfs_xattr_handler posix_acl_access_handler; #define reiserfs_get_acl NULL #define reiserfs_cache_default_acl(inode) 0 -static inline int -reiserfs_xattr_posix_acl_init (void) +static inline int reiserfs_xattr_posix_acl_init(void) { - return 0; + return 0; } -static inline int -reiserfs_xattr_posix_acl_exit (void) +static inline int reiserfs_xattr_posix_acl_exit(void) { - return 0; + return 0; } -static inline int -reiserfs_acl_chmod (struct inode *inode) +static inline int reiserfs_acl_chmod(struct inode *inode) { - return 0; + return 0; } static inline int -reiserfs_inherit_default_acl (const struct inode *dir, struct dentry *dentry, struct inode *inode) +reiserfs_inherit_default_acl(const struct inode *dir, struct dentry *dentry, + struct inode *inode) { - return 0; + return 0; } #endif diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 4c7c5689ad9..17e458e17e2 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -3,11 +3,10 @@ */ /* this file has an amazingly stupid - name, yura please fix it to be - reiserfs.h, and merge all the rest - of our .h files that are in this - directory into it. */ - + name, yura please fix it to be + reiserfs.h, and merge all the rest + of our .h files that are in this + directory into it. */ #ifndef _LINUX_REISER_FS_H #define _LINUX_REISER_FS_H @@ -74,9 +73,9 @@ /* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug ** messages. */ -#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ +#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ -void reiserfs_warning (struct super_block *s, const char * fmt, ...); +void reiserfs_warning(struct super_block *s, const char *fmt, ...); /* assertions handling */ /** always check a condition and panic if it's false. */ @@ -105,82 +104,78 @@ if( !( cond ) ) \ * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs * the version in RAM is part of a larger structure containing fields never written to disk. */ -#define UNSET_HASH 0 // read_super will guess about, what hash names - // in directories were sorted with +#define UNSET_HASH 0 // read_super will guess about, what hash names + // in directories were sorted with #define TEA_HASH 1 #define YURA_HASH 2 #define R5_HASH 3 #define DEFAULT_HASH R5_HASH - struct journal_params { - __le32 jp_journal_1st_block; /* where does journal start from on its - * device */ - __le32 jp_journal_dev; /* journal device st_rdev */ - __le32 jp_journal_size; /* size of the journal */ - __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */ - __le32 jp_journal_magic; /* random value made on fs creation (this - * was sb_journal_block_count) */ - __le32 jp_journal_max_batch; /* max number of blocks to batch into a - * trans */ - __le32 jp_journal_max_commit_age; /* in seconds, how old can an async - * commit be */ - __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction - * be */ + __le32 jp_journal_1st_block; /* where does journal start from on its + * device */ + __le32 jp_journal_dev; /* journal device st_rdev */ + __le32 jp_journal_size; /* size of the journal */ + __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */ + __le32 jp_journal_magic; /* random value made on fs creation (this + * was sb_journal_block_count) */ + __le32 jp_journal_max_batch; /* max number of blocks to batch into a + * trans */ + __le32 jp_journal_max_commit_age; /* in seconds, how old can an async + * commit be */ + __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction + * be */ }; /* this is the super from 3.5.X, where X >= 10 */ -struct reiserfs_super_block_v1 -{ - __le32 s_block_count; /* blocks count */ - __le32 s_free_blocks; /* free blocks count */ - __le32 s_root_block; /* root block number */ - struct journal_params s_journal; - __le16 s_blocksize; /* block size */ - __le16 s_oid_maxsize; /* max size of object id array, see - * get_objectid() commentary */ - __le16 s_oid_cursize; /* current size of object id array */ - __le16 s_umount_state; /* this is set to 1 when filesystem was - * umounted, to 2 - when not */ - char s_magic[10]; /* reiserfs magic string indicates that - * file system is reiserfs: - * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ - __le16 s_fs_state; /* it is set to used by fsck to mark which - * phase of rebuilding is done */ - __le32 s_hash_function_code; /* indicate, what hash function is being use - * to sort names in a directory*/ - __le16 s_tree_height; /* height of disk tree */ - __le16 s_bmap_nr; /* amount of bitmap blocks needed to address - * each block of file system */ - __le16 s_version; /* this field is only reliable on filesystem - * with non-standard journal */ - __le16 s_reserved_for_journal; /* size in blocks of journal area on main - * device, we need to keep after - * making fs with non-standard journal */ +struct reiserfs_super_block_v1 { + __le32 s_block_count; /* blocks count */ + __le32 s_free_blocks; /* free blocks count */ + __le32 s_root_block; /* root block number */ + struct journal_params s_journal; + __le16 s_blocksize; /* block size */ + __le16 s_oid_maxsize; /* max size of object id array, see + * get_objectid() commentary */ + __le16 s_oid_cursize; /* current size of object id array */ + __le16 s_umount_state; /* this is set to 1 when filesystem was + * umounted, to 2 - when not */ + char s_magic[10]; /* reiserfs magic string indicates that + * file system is reiserfs: + * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ + __le16 s_fs_state; /* it is set to used by fsck to mark which + * phase of rebuilding is done */ + __le32 s_hash_function_code; /* indicate, what hash function is being use + * to sort names in a directory*/ + __le16 s_tree_height; /* height of disk tree */ + __le16 s_bmap_nr; /* amount of bitmap blocks needed to address + * each block of file system */ + __le16 s_version; /* this field is only reliable on filesystem + * with non-standard journal */ + __le16 s_reserved_for_journal; /* size in blocks of journal area on main + * device, we need to keep after + * making fs with non-standard journal */ } __attribute__ ((__packed__)); #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) /* this is the on disk super block */ -struct reiserfs_super_block -{ - struct reiserfs_super_block_v1 s_v1; - __le32 s_inode_generation; - __le32 s_flags; /* Right now used only by inode-attributes, if enabled */ - unsigned char s_uuid[16]; /* filesystem unique identifier */ - unsigned char s_label[16]; /* filesystem volume label */ - char s_unused[88] ; /* zero filled by mkreiserfs and - * reiserfs_convert_objectid_map_v1() - * so any additions must be updated - * there as well. */ -} __attribute__ ((__packed__)); +struct reiserfs_super_block { + struct reiserfs_super_block_v1 s_v1; + __le32 s_inode_generation; + __le32 s_flags; /* Right now used only by inode-attributes, if enabled */ + unsigned char s_uuid[16]; /* filesystem unique identifier */ + unsigned char s_label[16]; /* filesystem volume label */ + char s_unused[88]; /* zero filled by mkreiserfs and + * reiserfs_convert_objectid_map_v1() + * so any additions must be updated + * there as well. */ +} __attribute__ ((__packed__)); #define SB_SIZE (sizeof(struct reiserfs_super_block)) #define REISERFS_VERSION_1 0 #define REISERFS_VERSION_2 2 - // on-disk super block fields converted to cpu form #define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) #define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) @@ -210,13 +205,12 @@ struct reiserfs_super_block #define PUT_SB_TREE_HEIGHT(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0) #define PUT_SB_REISERFS_STATE(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0) + do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0) #define PUT_SB_VERSION(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0) #define PUT_SB_BMAP_NR(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0) - #define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal) #define SB_ONDISK_JOURNAL_SIZE(s) \ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size)) @@ -231,21 +225,19 @@ struct reiserfs_super_block block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \ && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \ ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \ - SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s))) - - + SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s))) /* used by gcc */ #define REISERFS_SUPER_MAGIC 0x52654973 /* used by file system utilities that - look at the superblock, etc. */ + look at the superblock, etc. */ #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" -int is_reiserfs_3_5 (struct reiserfs_super_block * rs); -int is_reiserfs_3_6 (struct reiserfs_super_block * rs); -int is_reiserfs_jr (struct reiserfs_super_block * rs); +int is_reiserfs_3_5(struct reiserfs_super_block *rs); +int is_reiserfs_3_6(struct reiserfs_super_block *rs); +int is_reiserfs_jr(struct reiserfs_super_block *rs); /* ReiserFS leaves the first 64k unused, so that partition labels have enough space. If someone wants to write a fancy bootloader that @@ -272,8 +264,8 @@ typedef __u32 b_blocknr_t; typedef __le32 unp_t; struct unfm_nodeinfo { - unp_t unfm_nodenum; - unsigned short unfm_freespace; + unp_t unfm_nodenum; + unsigned short unfm_freespace; }; /* there are two formats of keys: 3.5 and 3.6 @@ -285,7 +277,6 @@ struct unfm_nodeinfo { #define STAT_DATA_V1 0 #define STAT_DATA_V2 1 - static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode) { return container_of(inode, struct reiserfs_inode_info, vfs_inode); @@ -343,15 +334,13 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) file would fit into one DIRECT item. Primary intention for this one is to increase performance by decreasing seeking. -*/ +*/ #define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ (\ (!(n_tail_size)) || \ (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \ ) - - /* * values for s_umount_state field */ @@ -364,9 +353,9 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) #define TYPE_STAT_DATA 0 #define TYPE_INDIRECT 1 #define TYPE_DIRECT 2 -#define TYPE_DIRENTRY 3 -#define TYPE_MAXTYPE 3 -#define TYPE_ANY 15 // FIXME: comment is required +#define TYPE_DIRENTRY 3 +#define TYPE_MAXTYPE 3 +#define TYPE_ANY 15 // FIXME: comment is required /***************************************************************************/ /* KEY & ITEM HEAD */ @@ -376,60 +365,62 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) // directories use this key as well as old files // struct offset_v1 { - __le32 k_offset; - __le32 k_uniqueness; + __le32 k_offset; + __le32 k_uniqueness; } __attribute__ ((__packed__)); struct offset_v2 { __le64 v; } __attribute__ ((__packed__)); -static inline __u16 offset_v2_k_type( const struct offset_v2 *v2 ) +static inline __u16 offset_v2_k_type(const struct offset_v2 *v2) { __u8 type = le64_to_cpu(v2->v) >> 60; - return (type <= TYPE_MAXTYPE)?type:TYPE_ANY; + return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY; } - -static inline void set_offset_v2_k_type( struct offset_v2 *v2, int type ) + +static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type) { - v2->v = (v2->v & cpu_to_le64(~0ULL>>4)) | cpu_to_le64((__u64)type<<60); + v2->v = + (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60); } - -static inline loff_t offset_v2_k_offset( const struct offset_v2 *v2 ) + +static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2) { - return le64_to_cpu(v2->v) & (~0ULL>>4); + return le64_to_cpu(v2->v) & (~0ULL >> 4); } -static inline void set_offset_v2_k_offset( struct offset_v2 *v2, loff_t offset ){ - offset &= (~0ULL>>4); - v2->v = (v2->v & cpu_to_le64(15ULL<<60)) | cpu_to_le64(offset); +static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset) +{ + offset &= (~0ULL >> 4); + v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); } /* Key of an item determines its location in the S+tree, and is composed of 4 components */ struct reiserfs_key { - __le32 k_dir_id; /* packing locality: by default parent - directory object id */ - __le32 k_objectid; /* object identifier */ - union { - struct offset_v1 k_offset_v1; - struct offset_v2 k_offset_v2; - } __attribute__ ((__packed__)) u; + __le32 k_dir_id; /* packing locality: by default parent + directory object id */ + __le32 k_objectid; /* object identifier */ + union { + struct offset_v1 k_offset_v1; + struct offset_v2 k_offset_v2; + } __attribute__ ((__packed__)) u; } __attribute__ ((__packed__)); struct in_core_key { - __u32 k_dir_id; /* packing locality: by default parent - directory object id */ - __u32 k_objectid; /* object identifier */ - __u64 k_offset; - __u8 k_type; + __u32 k_dir_id; /* packing locality: by default parent + directory object id */ + __u32 k_objectid; /* object identifier */ + __u64 k_offset; + __u8 k_type; }; struct cpu_key { - struct in_core_key on_disk_key; - int version; - int key_length; /* 3 in all cases but direct2indirect and - indirect2direct conversion */ + struct in_core_key on_disk_key; + int version; + int key_length; /* 3 in all cases but direct2indirect and + indirect2direct conversion */ }; /* Our function for comparing keys can compare keys of different @@ -475,8 +466,7 @@ struct cpu_key { indirect items) and specifies the location of the item itself within the block. */ -struct item_head -{ +struct item_head { /* Everything in the tree is found by searching for it based on * its key.*/ struct reiserfs_key ih_key; @@ -492,13 +482,13 @@ struct item_head number of directory entries in the directory item. */ __le16 ih_entry_count; } __attribute__ ((__packed__)) u; - __le16 ih_item_len; /* total size of the item body */ - __le16 ih_item_location; /* an offset to the item body - * within the block */ - __le16 ih_version; /* 0 for all old items, 2 for new - ones. Highest bit is set by fsck - temporary, cleaned after all - done */ + __le16 ih_item_len; /* total size of the item body */ + __le16 ih_item_location; /* an offset to the item body + * within the block */ + __le16 ih_version; /* 0 for all old items, 2 for new + ones. Highest bit is set by fsck + temporary, cleaned after all + done */ } __attribute__ ((__packed__)); /* size of item header */ #define IH_SIZE (sizeof(struct item_head)) @@ -515,7 +505,6 @@ struct item_head #define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0) #define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0) - #define unreachable_item(ih) (ih_version(ih) & (1 << 15)) #define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) @@ -537,40 +526,48 @@ struct item_head #define V1_INDIRECT_UNIQUENESS 0xfffffffe #define V1_DIRECT_UNIQUENESS 0xffffffff #define V1_DIRENTRY_UNIQUENESS 500 -#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required +#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required // // here are conversion routines // -static inline int uniqueness2type (__u32 uniqueness) CONSTF; -static inline int uniqueness2type (__u32 uniqueness) +static inline int uniqueness2type(__u32 uniqueness) CONSTF; +static inline int uniqueness2type(__u32 uniqueness) { - switch ((int)uniqueness) { - case V1_SD_UNIQUENESS: return TYPE_STAT_DATA; - case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT; - case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT; - case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY; - default: - reiserfs_warning (NULL, "vs-500: unknown uniqueness %d", - uniqueness); + switch ((int)uniqueness) { + case V1_SD_UNIQUENESS: + return TYPE_STAT_DATA; + case V1_INDIRECT_UNIQUENESS: + return TYPE_INDIRECT; + case V1_DIRECT_UNIQUENESS: + return TYPE_DIRECT; + case V1_DIRENTRY_UNIQUENESS: + return TYPE_DIRENTRY; + default: + reiserfs_warning(NULL, "vs-500: unknown uniqueness %d", + uniqueness); case V1_ANY_UNIQUENESS: - return TYPE_ANY; - } + return TYPE_ANY; + } } -static inline __u32 type2uniqueness (int type) CONSTF; -static inline __u32 type2uniqueness (int type) +static inline __u32 type2uniqueness(int type) CONSTF; +static inline __u32 type2uniqueness(int type) { - switch (type) { - case TYPE_STAT_DATA: return V1_SD_UNIQUENESS; - case TYPE_INDIRECT: return V1_INDIRECT_UNIQUENESS; - case TYPE_DIRECT: return V1_DIRECT_UNIQUENESS; - case TYPE_DIRENTRY: return V1_DIRENTRY_UNIQUENESS; - default: - reiserfs_warning (NULL, "vs-501: unknown type %d", type); + switch (type) { + case TYPE_STAT_DATA: + return V1_SD_UNIQUENESS; + case TYPE_INDIRECT: + return V1_INDIRECT_UNIQUENESS; + case TYPE_DIRECT: + return V1_DIRECT_UNIQUENESS; + case TYPE_DIRENTRY: + return V1_DIRENTRY_UNIQUENESS; + default: + reiserfs_warning(NULL, "vs-501: unknown type %d", type); case TYPE_ANY: - return V1_ANY_UNIQUENESS; - } + return V1_ANY_UNIQUENESS; + } } // @@ -578,57 +575,56 @@ static inline __u32 type2uniqueness (int type) // there is no way to get version of object from key, so, provide // version to these defines // -static inline loff_t le_key_k_offset (int version, const struct reiserfs_key * key) +static inline loff_t le_key_k_offset(int version, + const struct reiserfs_key *key) { - return (version == KEY_FORMAT_3_5) ? - le32_to_cpu( key->u.k_offset_v1.k_offset ) : - offset_v2_k_offset( &(key->u.k_offset_v2) ); + return (version == KEY_FORMAT_3_5) ? + le32_to_cpu(key->u.k_offset_v1.k_offset) : + offset_v2_k_offset(&(key->u.k_offset_v2)); } -static inline loff_t le_ih_k_offset (const struct item_head * ih) +static inline loff_t le_ih_k_offset(const struct item_head *ih) { - return le_key_k_offset (ih_version (ih), &(ih->ih_key)); + return le_key_k_offset(ih_version(ih), &(ih->ih_key)); } -static inline loff_t le_key_k_type (int version, const struct reiserfs_key * key) +static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) { - return (version == KEY_FORMAT_3_5) ? - uniqueness2type( le32_to_cpu( key->u.k_offset_v1.k_uniqueness)) : - offset_v2_k_type( &(key->u.k_offset_v2) ); + return (version == KEY_FORMAT_3_5) ? + uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) : + offset_v2_k_type(&(key->u.k_offset_v2)); } -static inline loff_t le_ih_k_type (const struct item_head * ih) +static inline loff_t le_ih_k_type(const struct item_head *ih) { - return le_key_k_type (ih_version (ih), &(ih->ih_key)); + return le_key_k_type(ih_version(ih), &(ih->ih_key)); } - -static inline void set_le_key_k_offset (int version, struct reiserfs_key * key, loff_t offset) +static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, + loff_t offset) { - (version == KEY_FORMAT_3_5) ? - (void)(key->u.k_offset_v1.k_offset = cpu_to_le32 (offset)) : /* jdm check */ - (void)(set_offset_v2_k_offset( &(key->u.k_offset_v2), offset )); + (version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) : /* jdm check */ + (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset)); } - -static inline void set_le_ih_k_offset (struct item_head * ih, loff_t offset) +static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) { - set_le_key_k_offset (ih_version (ih), &(ih->ih_key), offset); + set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); } - -static inline void set_le_key_k_type (int version, struct reiserfs_key * key, int type) +static inline void set_le_key_k_type(int version, struct reiserfs_key *key, + int type) { - (version == KEY_FORMAT_3_5) ? - (void)(key->u.k_offset_v1.k_uniqueness = cpu_to_le32(type2uniqueness(type))): - (void)(set_offset_v2_k_type( &(key->u.k_offset_v2), type )); + (version == KEY_FORMAT_3_5) ? + (void)(key->u.k_offset_v1.k_uniqueness = + cpu_to_le32(type2uniqueness(type))) + : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type)); } -static inline void set_le_ih_k_type (struct item_head * ih, int type) +static inline void set_le_ih_k_type(struct item_head *ih, int type) { - set_le_key_k_type (ih_version (ih), &(ih->ih_key), type); + set_le_key_k_type(ih_version(ih), &(ih->ih_key), type); } - #define is_direntry_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRENTRY) #define is_direct_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRECT) #define is_indirect_le_key(version,key) (le_key_k_type (version, key) == TYPE_INDIRECT) @@ -642,34 +638,32 @@ static inline void set_le_ih_k_type (struct item_head * ih, int type) #define is_indirect_le_ih(ih) is_indirect_le_key (ih_version(ih), &((ih)->ih_key)) #define is_statdata_le_ih(ih) is_statdata_le_key (ih_version (ih), &((ih)->ih_key)) - - // // key is pointer to cpu key, result is cpu // -static inline loff_t cpu_key_k_offset (const struct cpu_key * key) +static inline loff_t cpu_key_k_offset(const struct cpu_key *key) { - return key->on_disk_key.k_offset; + return key->on_disk_key.k_offset; } -static inline loff_t cpu_key_k_type (const struct cpu_key * key) +static inline loff_t cpu_key_k_type(const struct cpu_key *key) { - return key->on_disk_key.k_type; + return key->on_disk_key.k_type; } -static inline void set_cpu_key_k_offset (struct cpu_key * key, loff_t offset) +static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset) { key->on_disk_key.k_offset = offset; } -static inline void set_cpu_key_k_type (struct cpu_key * key, int type) +static inline void set_cpu_key_k_type(struct cpu_key *key, int type) { key->on_disk_key.k_type = type; } -static inline void cpu_key_k_offset_dec (struct cpu_key * key) +static inline void cpu_key_k_offset_dec(struct cpu_key *key) { - key->on_disk_key.k_offset --; + key->on_disk_key.k_offset--; } #define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) @@ -677,34 +671,25 @@ static inline void cpu_key_k_offset_dec (struct cpu_key * key) #define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) #define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) - /* are these used ? */ #define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key))) #define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key))) #define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) #define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) - - - - #define I_K_KEY_IN_ITEM(p_s_ih, p_s_key, n_blocksize) \ ( ! COMP_SHORT_KEYS(p_s_ih, p_s_key) && \ I_OFF_BYTE_IN_ITEM(p_s_ih, k_offset (p_s_key), n_blocksize) ) -/* maximal length of item */ +/* maximal length of item */ #define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) #define MIN_ITEM_LEN 1 - /* object identifier for root dir */ #define REISERFS_ROOT_OBJECTID 2 #define REISERFS_ROOT_PARENT_OBJECTID 1 extern struct reiserfs_key root_key; - - - /* * Picture represents a leaf of the S+tree * ______________________________________________________ @@ -716,13 +701,13 @@ extern struct reiserfs_key root_key; /* Header of a disk block. More precisely, header of a formatted leaf or internal node, and not the header of an unformatted node. */ -struct block_head { - __le16 blk_level; /* Level of a block in the tree. */ - __le16 blk_nr_item; /* Number of keys/items in a block. */ - __le16 blk_free_space; /* Block free space in bytes. */ - __le16 blk_reserved; - /* dump this in v4/planA */ - struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */ +struct block_head { + __le16 blk_level; /* Level of a block in the tree. */ + __le16 blk_nr_item; /* Number of keys/items in a block. */ + __le16 blk_free_space; /* Block free space in bytes. */ + __le16 blk_reserved; + /* dump this in v4/planA */ + struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */ }; #define BLKH_SIZE (sizeof(struct block_head)) @@ -741,12 +726,12 @@ struct block_head { * values for blk_level field of the struct block_head */ -#define FREE_LEVEL 0 /* when node gets removed from the tree its - blk_level is set to FREE_LEVEL. It is then - used to see whether the node is still in the - tree */ +#define FREE_LEVEL 0 /* when node gets removed from the tree its + blk_level is set to FREE_LEVEL. It is then + used to see whether the node is still in the + tree */ -#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level.*/ +#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ /* Given the buffer head of a formatted node, resolve to the block head of that node. */ #define B_BLK_HEAD(p_s_bh) ((struct block_head *)((p_s_bh)->b_data)) @@ -759,7 +744,6 @@ struct block_head { #define PUT_B_LEVEL(p_s_bh,val) do { set_blkh_level(B_BLK_HEAD(p_s_bh),val); } while (0) #define PUT_B_FREE_SPACE(p_s_bh,val) do { set_blkh_free_space(B_BLK_HEAD(p_s_bh),val); } while (0) - /* Get right delimiting key. -- little endian */ #define B_PRIGHT_DELIM_KEY(p_s_bh) (&(blk_right_delim_key(B_BLK_HEAD(p_s_bh)) @@ -770,41 +754,36 @@ struct block_head { #define B_IS_KEYS_LEVEL(p_s_bh) (B_LEVEL(p_s_bh) > DISK_LEAF_NODE_LEVEL \ && B_LEVEL(p_s_bh) <= MAX_HEIGHT) - - - /***************************************************************************/ /* STAT DATA */ /***************************************************************************/ - // // old stat data is 32 bytes long. We are going to distinguish new one by // different size // -struct stat_data_v1 -{ - __le16 sd_mode; /* file type, permissions */ - __le16 sd_nlink; /* number of hard links */ - __le16 sd_uid; /* owner */ - __le16 sd_gid; /* group */ - __le32 sd_size; /* file size */ - __le32 sd_atime; /* time of last access */ - __le32 sd_mtime; /* time file was last modified */ - __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ - union { - __le32 sd_rdev; - __le32 sd_blocks; /* number of blocks file uses */ - } __attribute__ ((__packed__)) u; - __le32 sd_first_direct_byte; /* first byte of file which is stored - in a direct item: except that if it - equals 1 it is a symlink and if it - equals ~(__u32)0 there is no - direct item. The existence of this - field really grates on me. Let's - replace it with a macro based on - sd_size and our tail suppression - policy. Someday. -Hans */ +struct stat_data_v1 { + __le16 sd_mode; /* file type, permissions */ + __le16 sd_nlink; /* number of hard links */ + __le16 sd_uid; /* owner */ + __le16 sd_gid; /* group */ + __le32 sd_size; /* file size */ + __le32 sd_atime; /* time of last access */ + __le32 sd_mtime; /* time file was last modified */ + __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + union { + __le32 sd_rdev; + __le32 sd_blocks; /* number of blocks file uses */ + } __attribute__ ((__packed__)) u; + __le32 sd_first_direct_byte; /* first byte of file which is stored + in a direct item: except that if it + equals 1 it is a symlink and if it + equals ~(__u32)0 there is no + direct item. The existence of this + field really grates on me. Let's + replace it with a macro based on + sd_size and our tail suppression + policy. Someday. -Hans */ } __attribute__ ((__packed__)); #define SD_V1_SIZE (sizeof(struct stat_data_v1)) @@ -862,29 +841,29 @@ struct stat_data_v1 /* Stat Data on disk (reiserfs version of UFS disk inode minus the address blocks) */ struct stat_data { - __le16 sd_mode; /* file type, permissions */ - __le16 sd_attrs; /* persistent inode flags */ - __le32 sd_nlink; /* number of hard links */ - __le64 sd_size; /* file size */ - __le32 sd_uid; /* owner */ - __le32 sd_gid; /* group */ - __le32 sd_atime; /* time of last access */ - __le32 sd_mtime; /* time file was last modified */ - __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ - __le32 sd_blocks; - union { - __le32 sd_rdev; - __le32 sd_generation; - //__le32 sd_first_direct_byte; - /* first byte of file which is stored in a - direct item: except that if it equals 1 - it is a symlink and if it equals - ~(__u32)0 there is no direct item. The - existence of this field really grates - on me. Let's replace it with a macro - based on sd_size and our tail - suppression policy? */ - } __attribute__ ((__packed__)) u; + __le16 sd_mode; /* file type, permissions */ + __le16 sd_attrs; /* persistent inode flags */ + __le32 sd_nlink; /* number of hard links */ + __le64 sd_size; /* file size */ + __le32 sd_uid; /* owner */ + __le32 sd_gid; /* group */ + __le32 sd_atime; /* time of last access */ + __le32 sd_mtime; /* time file was last modified */ + __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + __le32 sd_blocks; + union { + __le32 sd_rdev; + __le32 sd_generation; + //__le32 sd_first_direct_byte; + /* first byte of file which is stored in a + direct item: except that if it equals 1 + it is a symlink and if it equals + ~(__u32)0 there is no direct item. The + existence of this field really grates + on me. Let's replace it with a macro + based on sd_size and our tail + suppression policy? */ + } __attribute__ ((__packed__)) u; } __attribute__ ((__packed__)); // // this is 44 bytes long @@ -919,7 +898,6 @@ struct stat_data { #define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) #define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) - /***************************************************************************/ /* DIRECTORY STRUCTURE */ /***************************************************************************/ @@ -954,17 +932,14 @@ struct stat_data { /* NOT IMPLEMENTED: Directory will someday contain stat data of object */ - - -struct reiserfs_de_head -{ - __le32 deh_offset; /* third component of the directory entry key */ - __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced - by directory entry */ - __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ - __le16 deh_location; /* offset of name in the whole item */ - __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether - entry is hidden (unlinked) */ +struct reiserfs_de_head { + __le32 deh_offset; /* third component of the directory entry key */ + __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced + by directory entry */ + __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ + __le16 deh_location; /* offset of name in the whole item */ + __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether + entry is hidden (unlinked) */ } __attribute__ ((__packed__)); #define DEH_SIZE sizeof(struct reiserfs_de_head) #define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) @@ -986,7 +961,7 @@ struct reiserfs_de_head /* old format directories have this size when empty */ #define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) -#define DEH_Statdata 0 /* not used now */ +#define DEH_Statdata 0 /* not used now */ #define DEH_Visible 2 /* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */ @@ -1023,10 +998,10 @@ struct reiserfs_de_head #define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) -extern void make_empty_dir_item_v1 (char * body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid); -extern void make_empty_dir_item (char * body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid); +extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, + __le32 par_dirid, __le32 par_objid); +extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, + __le32 par_dirid, __le32 par_objid); /* array of the entry headers */ /* get item body */ @@ -1043,53 +1018,48 @@ extern void make_empty_dir_item (char * body, __le32 dirid, __le32 objid, #define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \ ((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh)))) */ -static inline int entry_length (const struct buffer_head * bh, - const struct item_head * ih, int pos_in_item) +static inline int entry_length(const struct buffer_head *bh, + const struct item_head *ih, int pos_in_item) { - struct reiserfs_de_head * deh; + struct reiserfs_de_head *deh; - deh = B_I_DEH (bh, ih) + pos_in_item; - if (pos_in_item) - return deh_location(deh-1) - deh_location(deh); + deh = B_I_DEH(bh, ih) + pos_in_item; + if (pos_in_item) + return deh_location(deh - 1) - deh_location(deh); - return ih_item_len(ih) - deh_location(deh); + return ih_item_len(ih) - deh_location(deh); } - - /* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */ #define I_ENTRY_COUNT(ih) (ih_entry_count((ih))) - /* name by bh, ih and entry_num */ #define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num)))) // two entries per block (at least) #define REISERFS_MAX_NAME(block_size) 255 - /* this structure is used for operations on directory entries. It is not a disk structure. */ /* When reiserfs_find_entry or search_by_entry_key find directory entry, they return filled reiserfs_dir_entry structure */ -struct reiserfs_dir_entry -{ - struct buffer_head * de_bh; - int de_item_num; - struct item_head * de_ih; - int de_entry_num; - struct reiserfs_de_head * de_deh; - int de_entrylen; - int de_namelen; - char * de_name; - char * de_gen_number_bit_string; - - __u32 de_dir_id; - __u32 de_objectid; - - struct cpu_key de_entry_key; +struct reiserfs_dir_entry { + struct buffer_head *de_bh; + int de_item_num; + struct item_head *de_ih; + int de_entry_num; + struct reiserfs_de_head *de_deh; + int de_entrylen; + int de_namelen; + char *de_name; + char *de_gen_number_bit_string; + + __u32 de_dir_id; + __u32 de_objectid; + + struct cpu_key de_entry_key; }; - + /* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ /* pointer to file name, stored in entry */ @@ -1099,8 +1069,6 @@ struct reiserfs_dir_entry #define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ (I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0)) - - /* hash value occupies bits from 7 up to 30 */ #define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) /* generation number occupies 7 bits starting from 0 up to 6 */ @@ -1109,7 +1077,6 @@ struct reiserfs_dir_entry #define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number)) - /* * Picture represents an internal node of the reiserfs tree * ______________________________________________________ @@ -1125,9 +1092,9 @@ struct reiserfs_dir_entry /* Disk child pointer: The pointer from an internal node of the tree to a node that is on disk. */ struct disk_child { - __le32 dc_block_number; /* Disk child's block number. */ - __le16 dc_size; /* Disk child's used space. */ - __le16 dc_reserved; + __le32 dc_block_number; /* Disk child's block number. */ + __le16 dc_size; /* Disk child's used space. */ + __le16 dc_reserved; }; #define DC_SIZE (sizeof(struct disk_child)) @@ -1144,7 +1111,7 @@ struct disk_child { #define B_N_CHILD_NUM(p_s_bh,n_pos) (dc_block_number(B_N_CHILD(p_s_bh,n_pos))) #define PUT_B_N_CHILD_NUM(p_s_bh,n_pos, val) (put_dc_block_number(B_N_CHILD(p_s_bh,n_pos), val )) - /* maximal value of field child_size in structure disk_child */ + /* maximal value of field child_size in structure disk_child */ /* child size is the combined size of all items and their headers */ #define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE )) @@ -1159,7 +1126,6 @@ struct disk_child { /* PATH STRUCTURES AND DEFINES */ /***************************************************************************/ - /* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it does not find them in the cache it reads them from disk. For each node search_by_key finds using @@ -1168,20 +1134,18 @@ struct disk_child { is looking through a leaf node bin_search will find the position of the item which has key either equal to given key, or which is the maximal key less than the given key. */ -struct path_element { - struct buffer_head * pe_buffer; /* Pointer to the buffer at the path in the tree. */ - int pe_position; /* Position in the tree node which is placed in the */ - /* buffer above. */ +struct path_element { + struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */ + int pe_position; /* Position in the tree node which is placed in the */ + /* buffer above. */ }; -#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ -#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ -#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ - -#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ -#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ - +#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ +#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ +#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ +#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ +#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ /* We need to keep track of who the ancestors of nodes are. When we perform a search we record which nodes were visited while @@ -1200,14 +1164,14 @@ excessive effort to avoid disturbing the precious VFS code.:-( The gods only know how we are going to SMP the code that uses them. znodes are the way! */ -#define PATH_READA 0x1 /* do read ahead */ -#define PATH_READA_BACK 0x2 /* read backwards */ +#define PATH_READA 0x1 /* do read ahead */ +#define PATH_READA_BACK 0x2 /* read backwards */ -struct path { - int path_length; /* Length of the array above. */ - int reada; - struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ - int pos_in_item; +struct path { + int path_length; /* Length of the array above. */ + int reada; + struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ + int pos_in_item; }; #define pos_in_item(path) ((path)->pos_in_item) @@ -1224,25 +1188,23 @@ struct path var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} /* Get position in the element at the path by path and path position. */ #define PATH_OFFSET_POSITION(p_s_path,n_offset) (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_position) - #define PATH_PLAST_BUFFER(p_s_path) (PATH_OFFSET_PBUFFER((p_s_path), (p_s_path)->path_length)) /* you know, to the person who didn't - write this the macro name does not - at first suggest what it does. - Maybe POSITION_FROM_PATH_END? Or - maybe we should just focus on - dumping paths... -Hans */ + write this the macro name does not + at first suggest what it does. + Maybe POSITION_FROM_PATH_END? Or + maybe we should just focus on + dumping paths... -Hans */ #define PATH_LAST_POSITION(p_s_path) (PATH_OFFSET_POSITION((p_s_path), (p_s_path)->path_length)) - #define PATH_PITEM_HEAD(p_s_path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_path),PATH_LAST_POSITION(p_s_path)) /* in do_balance leaf has h == 0 in contrast with path structure, where root has level == 0. That is why we need these defines */ #define PATH_H_PBUFFER(p_s_path, h) PATH_OFFSET_PBUFFER (p_s_path, p_s_path->path_length - (h)) /* tb->S[h] */ -#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ -#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) -#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ +#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ +#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) +#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ #define PATH_H_PATH_OFFSET(p_s_path, n_h) ((p_s_path)->path_length - (n_h)) @@ -1253,7 +1215,6 @@ struct path var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} #define item_moved(ih,path) comp_items(ih, path) #define path_changed(ih,path) comp_items (ih, path) - /***************************************************************************/ /* MISC */ /***************************************************************************/ @@ -1272,30 +1233,26 @@ struct path var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset #define U32_MAX (~(__u32)0) -static inline loff_t max_reiserfs_offset (struct inode * inode) +static inline loff_t max_reiserfs_offset(struct inode *inode) { - if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) - return (loff_t)U32_MAX; + if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) + return (loff_t) U32_MAX; - return (loff_t)((~(__u64)0) >> 4); + return (loff_t) ((~(__u64) 0) >> 4); } - /*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/ #define MAX_KEY_OBJECTID MAX_UL_INT - #define MAX_B_NUM MAX_UL_INT #define MAX_FC_NUM MAX_US_INT - /* the purpose is to detect overflow of an unsigned short */ #define REISERFS_LINK_MAX (MAX_US_INT - 1000) - /* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ -#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ -#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ +#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ +#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ #define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) #define get_generation(s) atomic_read (&fs_generation(s)) @@ -1303,7 +1260,6 @@ static inline loff_t max_reiserfs_offset (struct inode * inode) #define __fs_changed(gen,s) (gen != get_generation (s)) #define fs_changed(gen,s) ({cond_resched(); __fs_changed(gen, s);}) - /***************************************************************************/ /* FIXATE NODES */ /***************************************************************************/ @@ -1324,38 +1280,34 @@ static inline loff_t max_reiserfs_offset (struct inode * inode) calculating what we can shift to neighbors and how many nodes we have to have if we do not any shiftings, if we shift to left/right neighbor or to both. */ -struct virtual_item -{ - int vi_index; // index in the array of item operations - unsigned short vi_type; // left/right mergeability - unsigned short vi_item_len; /* length of item that it will have after balancing */ - struct item_head * vi_ih; - const char * vi_item; // body of item (old or new) - const void * vi_new_data; // 0 always but paste mode - void * vi_uarea; // item specific area +struct virtual_item { + int vi_index; // index in the array of item operations + unsigned short vi_type; // left/right mergeability + unsigned short vi_item_len; /* length of item that it will have after balancing */ + struct item_head *vi_ih; + const char *vi_item; // body of item (old or new) + const void *vi_new_data; // 0 always but paste mode + void *vi_uarea; // item specific area }; - -struct virtual_node -{ - char * vn_free_ptr; /* this is a pointer to the free space in the buffer */ - unsigned short vn_nr_item; /* number of items in virtual node */ - short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ - short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ - short vn_affected_item_num; - short vn_pos_in_item; - struct item_head * vn_ins_ih; /* item header of inserted item, 0 for other modes */ - const void * vn_data; - struct virtual_item * vn_vi; /* array of items (including a new one, excluding item to be deleted) */ +struct virtual_node { + char *vn_free_ptr; /* this is a pointer to the free space in the buffer */ + unsigned short vn_nr_item; /* number of items in virtual node */ + short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ + short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ + short vn_affected_item_num; + short vn_pos_in_item; + struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */ + const void *vn_data; + struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */ }; /* used by directory items when creating virtual nodes */ struct direntry_uarea { - int flags; - __u16 entry_count; - __u16 entry_sizes[1]; -} __attribute__ ((__packed__)) ; - + int flags; + __u16 entry_count; + __u16 entry_sizes[1]; +} __attribute__ ((__packed__)); /***************************************************************************/ /* TREE BALANCE */ @@ -1378,73 +1330,72 @@ struct direntry_uarea { #define MAX_AMOUNT_NEEDED 2 /* someday somebody will prefix every field in this struct with tb_ */ -struct tree_balance -{ - int tb_mode; - int need_balance_dirty; - struct super_block * tb_sb; - struct reiserfs_transaction_handle *transaction_handle ; - struct path * tb_path; - struct buffer_head * L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ - struct buffer_head * R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path*/ - struct buffer_head * FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ - struct buffer_head * FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ - struct buffer_head * CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ - struct buffer_head * CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ - - struct buffer_head * FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals - cur_blknum. */ - struct buffer_head * used[MAX_FEB_SIZE]; - struct buffer_head * thrown[MAX_FEB_SIZE]; - int lnum[MAX_HEIGHT]; /* array of number of items which must be - shifted to the left in order to balance the - current node; for leaves includes item that - will be partially shifted; for internal - nodes, it is the number of child pointers - rather than items. It includes the new item - being created. The code sometimes subtracts - one to get the number of wholly shifted - items for other purposes. */ - int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ - int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and - S[h] to its item number within the node CFL[h] */ - int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ - int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from - S[h]. A negative value means removing. */ - int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after - balancing on the level h of the tree. If 0 then S is - being deleted, if 1 then S is remaining and no new nodes - are being created, if 2 or 3 then 1 or 2 new nodes is - being created */ - - /* fields that are used only for balancing leaves of the tree */ - int cur_blknum; /* number of empty blocks having been already allocated */ - int s0num; /* number of items that fall into left most node when S[0] splits */ - int s1num; /* number of items that fall into first new node when S[0] splits */ - int s2num; /* number of items that fall into second new node when S[0] splits */ - int lbytes; /* number of bytes which can flow to the left neighbor from the left */ - /* most liquid item that cannot be shifted from S[0] entirely */ - /* if -1 then nothing will be partially shifted */ - int rbytes; /* number of bytes which will flow to the right neighbor from the right */ - /* most liquid item that cannot be shifted from S[0] entirely */ - /* if -1 then nothing will be partially shifted */ - int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ - /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ - int s2bytes; - struct buffer_head * buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ - char * vn_buf; /* kmalloced memory. Used to create +struct tree_balance { + int tb_mode; + int need_balance_dirty; + struct super_block *tb_sb; + struct reiserfs_transaction_handle *transaction_handle; + struct path *tb_path; + struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ + struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ + struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ + struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ + struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ + struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ + + struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals + cur_blknum. */ + struct buffer_head *used[MAX_FEB_SIZE]; + struct buffer_head *thrown[MAX_FEB_SIZE]; + int lnum[MAX_HEIGHT]; /* array of number of items which must be + shifted to the left in order to balance the + current node; for leaves includes item that + will be partially shifted; for internal + nodes, it is the number of child pointers + rather than items. It includes the new item + being created. The code sometimes subtracts + one to get the number of wholly shifted + items for other purposes. */ + int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ + int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and + S[h] to its item number within the node CFL[h] */ + int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ + int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from + S[h]. A negative value means removing. */ + int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after + balancing on the level h of the tree. If 0 then S is + being deleted, if 1 then S is remaining and no new nodes + are being created, if 2 or 3 then 1 or 2 new nodes is + being created */ + + /* fields that are used only for balancing leaves of the tree */ + int cur_blknum; /* number of empty blocks having been already allocated */ + int s0num; /* number of items that fall into left most node when S[0] splits */ + int s1num; /* number of items that fall into first new node when S[0] splits */ + int s2num; /* number of items that fall into second new node when S[0] splits */ + int lbytes; /* number of bytes which can flow to the left neighbor from the left */ + /* most liquid item that cannot be shifted from S[0] entirely */ + /* if -1 then nothing will be partially shifted */ + int rbytes; /* number of bytes which will flow to the right neighbor from the right */ + /* most liquid item that cannot be shifted from S[0] entirely */ + /* if -1 then nothing will be partially shifted */ + int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ + /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ + int s2bytes; + struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ + char *vn_buf; /* kmalloced memory. Used to create virtual node and keep map of dirtied bitmap blocks */ - int vn_buf_size; /* size of the vn_buf */ - struct virtual_node * tb_vn; /* VN starts after bitmap of bitmap blocks */ + int vn_buf_size; /* size of the vn_buf */ + struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */ - int fs_gen; /* saved value of `reiserfs_generation' counter - see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ + int fs_gen; /* saved value of `reiserfs_generation' counter + see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ #ifdef DISPLACE_NEW_PACKING_LOCALITIES - struct in_core_key key; /* key pointer, to pass to block allocator or - another low-level subsystem */ + struct in_core_key key; /* key pointer, to pass to block allocator or + another low-level subsystem */ #endif -} ; +}; /* These are modes of balancing */ @@ -1479,13 +1430,12 @@ struct tree_balance /* used in do_balance for passing parent of node information that has been gotten from tb struct */ struct buffer_info { - struct tree_balance * tb; - struct buffer_head * bi_bh; - struct buffer_head * bi_parent; - int bi_position; + struct tree_balance *tb; + struct buffer_head *bi_bh; + struct buffer_head *bi_parent; + int bi_position; }; - /* there are 4 types of items: stat data, directory item, indirect, direct. +-------------------+------------+--------------+------------+ | | k_offset | k_uniqueness | mergeable? | @@ -1503,24 +1453,24 @@ struct buffer_info { */ struct item_operations { - int (*bytes_number) (struct item_head * ih, int block_size); - void (*decrement_key) (struct cpu_key *); - int (*is_left_mergeable) (struct reiserfs_key * ih, unsigned long bsize); - void (*print_item) (struct item_head *, char * item); - void (*check_item) (struct item_head *, char * item); - - int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, - int is_affected, int insert_size); - int (*check_left) (struct virtual_item * vi, int free, - int start_skip, int end_skip); - int (*check_right) (struct virtual_item * vi, int free); - int (*part_size) (struct virtual_item * vi, int from, int to); - int (*unit_num) (struct virtual_item * vi); - void (*print_vi) (struct virtual_item * vi); + int (*bytes_number) (struct item_head * ih, int block_size); + void (*decrement_key) (struct cpu_key *); + int (*is_left_mergeable) (struct reiserfs_key * ih, + unsigned long bsize); + void (*print_item) (struct item_head *, char *item); + void (*check_item) (struct item_head *, char *item); + + int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, + int is_affected, int insert_size); + int (*check_left) (struct virtual_item * vi, int free, + int start_skip, int end_skip); + int (*check_right) (struct virtual_item * vi, int free); + int (*part_size) (struct virtual_item * vi, int from, int to); + int (*unit_num) (struct virtual_item * vi); + void (*print_vi) (struct virtual_item * vi); }; - -extern struct item_operations * item_ops [TYPE_ANY + 1]; +extern struct item_operations *item_ops[TYPE_ANY + 1]; #define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) #define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) @@ -1533,8 +1483,6 @@ extern struct item_operations * item_ops [TYPE_ANY + 1]; #define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi) #define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi) - - #define COMP_SHORT_KEYS comp_short_keys /* number of blocks pointed to by the indirect item */ @@ -1545,8 +1493,7 @@ extern struct item_operations * item_ops [TYPE_ANY + 1]; /* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ - -/* get the item header */ +/* get the item header */ #define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) ) /* get key */ @@ -1577,9 +1524,9 @@ extern struct item_operations * item_ops [TYPE_ANY + 1]; #define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0) struct reiserfs_iget_args { - __u32 objectid ; - __u32 dirid ; -} ; + __u32 objectid; + __u32 dirid; +}; /***************************************************************************/ /* FUNCTION DECLARATIONS */ @@ -1595,11 +1542,11 @@ struct reiserfs_iget_args { /* first block written in a commit. */ struct reiserfs_journal_desc { - __le32 j_trans_id ; /* id of commit */ - __le32 j_len ; /* length of commit. len +1 is the commit block */ - __le32 j_mount_id ; /* mount id of this trans*/ - __le32 j_realblock[1] ; /* real locations for each block */ -} ; + __le32 j_trans_id; /* id of commit */ + __le32 j_len; /* length of commit. len +1 is the commit block */ + __le32 j_mount_id; /* mount id of this trans */ + __le32 j_realblock[1]; /* real locations for each block */ +}; #define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id) #define get_desc_trans_len(d) le32_to_cpu((d)->j_len) @@ -1611,10 +1558,10 @@ struct reiserfs_journal_desc { /* last block written in a commit */ struct reiserfs_journal_commit { - __le32 j_trans_id ; /* must match j_trans_id from the desc block */ - __le32 j_len ; /* ditto */ - __le32 j_realblock[1] ; /* real locations for each block */ -} ; + __le32 j_trans_id; /* must match j_trans_id from the desc block */ + __le32 j_len; /* ditto */ + __le32 j_realblock[1]; /* real locations for each block */ +}; #define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id) #define get_commit_trans_len(c) le32_to_cpu((c)->j_len) @@ -1628,19 +1575,19 @@ struct reiserfs_journal_commit { ** and this transaction does not need to be replayed. */ struct reiserfs_journal_header { - __le32 j_last_flush_trans_id ; /* id of last fully flushed transaction */ - __le32 j_first_unflushed_offset ; /* offset in the log of where to start replay after a crash */ - __le32 j_mount_id ; - /* 12 */ struct journal_params jh_journal; -} ; + __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */ + __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */ + __le32 j_mount_id; + /* 12 */ struct journal_params jh_journal; +}; /* biggest tunable defines are right here */ -#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ -#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ +#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ +#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ #define JOURNAL_TRANS_MIN_DEFAULT 256 -#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ +#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ #define JOURNAL_MIN_RATIO 2 -#define JOURNAL_MAX_COMMIT_AGE 30 +#define JOURNAL_MAX_COMMIT_AGE 30 #define JOURNAL_MAX_TRANS_AGE 30 #define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) #ifdef CONFIG_QUOTA @@ -1664,10 +1611,10 @@ struct reiserfs_journal_header { ** the current number of nodes is > max, the node is freed, otherwise, ** it is put on a free list for faster use later. */ -#define REISERFS_MIN_BITMAP_NODES 10 -#define REISERFS_MAX_BITMAP_NODES 100 +#define REISERFS_MIN_BITMAP_NODES 10 +#define REISERFS_MAX_BITMAP_NODES 100 -#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ +#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ #define JBH_HASH_MASK 8191 #define _jhashfn(sb,block) \ @@ -1681,14 +1628,14 @@ struct reiserfs_journal_header { #define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) enum reiserfs_bh_state_bits { - BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ - BH_JDirty_wait, - BH_JNew, /* disk block was taken off free list before - * being in a finished transaction, or - * written to disk. Can be reused immed. */ - BH_JPrepared, - BH_JRestore_dirty, - BH_JTest, // debugging only will go away + BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ + BH_JDirty_wait, + BH_JNew, /* disk block was taken off free list before + * being in a finished transaction, or + * written to disk. Can be reused immed. */ + BH_JPrepared, + BH_JRestore_dirty, + BH_JTest, // debugging only will go away }; BUFFER_FNS(JDirty, journaled); @@ -1708,175 +1655,192 @@ TAS_BUFFER_FNS(JTest, journal_test); ** transaction handle which is passed around for all journal calls */ struct reiserfs_transaction_handle { - struct super_block *t_super ; /* super for this FS when journal_begin was - called. saves calls to reiserfs_get_super - also used by nested transactions to make - sure they are nesting on the right FS - _must_ be first in the handle - */ - int t_refcount; - int t_blocks_logged ; /* number of blocks this writer has logged */ - int t_blocks_allocated ; /* number of blocks this writer allocated */ - unsigned long t_trans_id ; /* sanity check, equals the current trans id */ - void *t_handle_save ; /* save existing current->journal_info */ - unsigned displace_new_blocks:1; /* if new block allocation occurres, that block - should be displaced from others */ - struct list_head t_list; -} ; + struct super_block *t_super; /* super for this FS when journal_begin was + called. saves calls to reiserfs_get_super + also used by nested transactions to make + sure they are nesting on the right FS + _must_ be first in the handle + */ + int t_refcount; + int t_blocks_logged; /* number of blocks this writer has logged */ + int t_blocks_allocated; /* number of blocks this writer allocated */ + unsigned long t_trans_id; /* sanity check, equals the current trans id */ + void *t_handle_save; /* save existing current->journal_info */ + unsigned displace_new_blocks:1; /* if new block allocation occurres, that block + should be displaced from others */ + struct list_head t_list; +}; /* used to keep track of ordered and tail writes, attached to the buffer * head through b_journal_head. */ struct reiserfs_jh { - struct reiserfs_journal_list *jl; - struct buffer_head *bh; - struct list_head list; + struct reiserfs_journal_list *jl; + struct buffer_head *bh; + struct list_head list; }; void reiserfs_free_jh(struct buffer_head *bh); int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); -int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ; - -static inline int -reiserfs_file_data_log(struct inode *inode) { - if (reiserfs_data_log(inode->i_sb) || - (REISERFS_I(inode)->i_flags & i_data_log)) - return 1 ; - return 0 ; +int journal_mark_dirty(struct reiserfs_transaction_handle *, + struct super_block *, struct buffer_head *bh); + +static inline int reiserfs_file_data_log(struct inode *inode) +{ + if (reiserfs_data_log(inode->i_sb) || + (REISERFS_I(inode)->i_flags & i_data_log)) + return 1; + return 0; } -static inline int reiserfs_transaction_running(struct super_block *s) { - struct reiserfs_transaction_handle *th = current->journal_info ; - if (th && th->t_super == s) - return 1 ; - if (th && th->t_super == NULL) - BUG(); - return 0 ; +static inline int reiserfs_transaction_running(struct super_block *s) +{ + struct reiserfs_transaction_handle *th = current->journal_info; + if (th && th->t_super == s) + return 1; + if (th && th->t_super == NULL) + BUG(); + return 0; } int reiserfs_async_progress_wait(struct super_block *s); -struct reiserfs_transaction_handle * -reiserfs_persistent_transaction(struct super_block *, int count); +struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct + super_block + *, + int count); int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); int reiserfs_commit_page(struct inode *inode, struct page *page, - unsigned from, unsigned to); + unsigned from, unsigned to); int reiserfs_flush_old_commits(struct super_block *); -int reiserfs_commit_for_inode(struct inode *) ; -int reiserfs_inode_needs_commit(struct inode *) ; -void reiserfs_update_inode_transaction(struct inode *) ; -void reiserfs_wait_on_write_block(struct super_block *s) ; -void reiserfs_block_writes(struct reiserfs_transaction_handle *th) ; -void reiserfs_allow_writes(struct super_block *s) ; -void reiserfs_check_lock_depth(struct super_block *s, char *caller) ; -int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait) ; -void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh) ; -int journal_init(struct super_block *, const char * j_dev_name, int old_format, unsigned int) ; -int journal_release(struct reiserfs_transaction_handle*, struct super_block *) ; -int journal_release_error(struct reiserfs_transaction_handle*, struct super_block *) ; -int journal_end(struct reiserfs_transaction_handle *, struct super_block *, unsigned long) ; -int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, unsigned long) ; -int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block *, b_blocknr_t blocknr) ; -int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ; -int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr, int searchall, b_blocknr_t *next) ; -int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; -int journal_join_abort(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; -void reiserfs_journal_abort (struct super_block *sb, int errno); -void reiserfs_abort (struct super_block *sb, int errno, const char *fmt, ...); -int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, int) ; - -void add_save_link (struct reiserfs_transaction_handle * th, - struct inode * inode, int truncate); -int remove_save_link (struct inode * inode, int truncate); +int reiserfs_commit_for_inode(struct inode *); +int reiserfs_inode_needs_commit(struct inode *); +void reiserfs_update_inode_transaction(struct inode *); +void reiserfs_wait_on_write_block(struct super_block *s); +void reiserfs_block_writes(struct reiserfs_transaction_handle *th); +void reiserfs_allow_writes(struct super_block *s); +void reiserfs_check_lock_depth(struct super_block *s, char *caller); +int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, + int wait); +void reiserfs_restore_prepared_buffer(struct super_block *, + struct buffer_head *bh); +int journal_init(struct super_block *, const char *j_dev_name, int old_format, + unsigned int); +int journal_release(struct reiserfs_transaction_handle *, struct super_block *); +int journal_release_error(struct reiserfs_transaction_handle *, + struct super_block *); +int journal_end(struct reiserfs_transaction_handle *, struct super_block *, + unsigned long); +int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, + unsigned long); +int journal_mark_freed(struct reiserfs_transaction_handle *, + struct super_block *, b_blocknr_t blocknr); +int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); +int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr, + int searchall, b_blocknr_t * next); +int journal_begin(struct reiserfs_transaction_handle *, + struct super_block *p_s_sb, unsigned long); +int journal_join_abort(struct reiserfs_transaction_handle *, + struct super_block *p_s_sb, unsigned long); +void reiserfs_journal_abort(struct super_block *sb, int errno); +void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); +int reiserfs_allocate_list_bitmaps(struct super_block *s, + struct reiserfs_list_bitmap *, int); + +void add_save_link(struct reiserfs_transaction_handle *th, + struct inode *inode, int truncate); +int remove_save_link(struct inode *inode, int truncate); /* objectid.c */ -__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th); -void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, __u32 objectid_to_release); -int reiserfs_convert_objectid_map_v1(struct super_block *) ; +__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th); +void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, + __u32 objectid_to_release); +int reiserfs_convert_objectid_map_v1(struct super_block *); /* stree.c */ int B_IS_IN_TREE(const struct buffer_head *); -extern void copy_item_head(struct item_head * p_v_to, - const struct item_head * p_v_from); +extern void copy_item_head(struct item_head *p_v_to, + const struct item_head *p_v_from); // first key is in cpu form, second - le -extern int comp_short_keys (const struct reiserfs_key * le_key, - const struct cpu_key * cpu_key); -extern void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from); +extern int comp_short_keys(const struct reiserfs_key *le_key, + const struct cpu_key *cpu_key); +extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); // both are in le form -extern int comp_le_keys (const struct reiserfs_key *, const struct reiserfs_key *); -extern int comp_short_le_keys (const struct reiserfs_key *, const struct reiserfs_key *); +extern int comp_le_keys(const struct reiserfs_key *, + const struct reiserfs_key *); +extern int comp_short_le_keys(const struct reiserfs_key *, + const struct reiserfs_key *); // // get key version from on disk key - kludge // -static inline int le_key_version (const struct reiserfs_key * key) +static inline int le_key_version(const struct reiserfs_key *key) { - int type; - - type = offset_v2_k_type( &(key->u.k_offset_v2)); - if (type != TYPE_DIRECT && type != TYPE_INDIRECT && type != TYPE_DIRENTRY) - return KEY_FORMAT_3_5; - - return KEY_FORMAT_3_6; - -} + int type; + type = offset_v2_k_type(&(key->u.k_offset_v2)); + if (type != TYPE_DIRECT && type != TYPE_INDIRECT + && type != TYPE_DIRENTRY) + return KEY_FORMAT_3_5; + + return KEY_FORMAT_3_6; -static inline void copy_key (struct reiserfs_key *to, const struct reiserfs_key *from) -{ - memcpy (to, from, KEY_SIZE); } +static inline void copy_key(struct reiserfs_key *to, + const struct reiserfs_key *from) +{ + memcpy(to, from, KEY_SIZE); +} -int comp_items (const struct item_head * stored_ih, const struct path * p_s_path); -const struct reiserfs_key * get_rkey (const struct path * p_s_chk_path, - const struct super_block * p_s_sb); -int search_by_key (struct super_block *, const struct cpu_key *, - struct path *, int); +int comp_items(const struct item_head *stored_ih, const struct path *p_s_path); +const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path, + const struct super_block *p_s_sb); +int search_by_key(struct super_block *, const struct cpu_key *, + struct path *, int); #define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) -int search_for_position_by_key (struct super_block * p_s_sb, - const struct cpu_key * p_s_cpu_key, - struct path * p_s_search_path); -extern void decrement_bcount (struct buffer_head * p_s_bh); -void decrement_counters_in_path (struct path * p_s_search_path); -void pathrelse (struct path * p_s_search_path); -int reiserfs_check_path(struct path *p) ; -void pathrelse_and_restore (struct super_block *s, struct path * p_s_search_path); - -int reiserfs_insert_item (struct reiserfs_transaction_handle *th, - struct path * path, - const struct cpu_key * key, - struct item_head * ih, - struct inode *inode, const char * body); - -int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, - struct path * path, - const struct cpu_key * key, - struct inode *inode, - const char * body, int paste_size); - -int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, - struct path * path, - struct cpu_key * key, - struct inode * inode, - struct page *page, - loff_t new_file_size); - -int reiserfs_delete_item (struct reiserfs_transaction_handle *th, - struct path * path, - const struct cpu_key * key, - struct inode * inode, - struct buffer_head * p_s_un_bh); - -void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, - struct inode *inode, struct reiserfs_key * key); -int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * p_s_inode); -int reiserfs_do_truncate (struct reiserfs_transaction_handle *th, - struct inode * p_s_inode, struct page *, - int update_timestamps); +int search_for_position_by_key(struct super_block *p_s_sb, + const struct cpu_key *p_s_cpu_key, + struct path *p_s_search_path); +extern void decrement_bcount(struct buffer_head *p_s_bh); +void decrement_counters_in_path(struct path *p_s_search_path); +void pathrelse(struct path *p_s_search_path); +int reiserfs_check_path(struct path *p); +void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path); + +int reiserfs_insert_item(struct reiserfs_transaction_handle *th, + struct path *path, + const struct cpu_key *key, + struct item_head *ih, + struct inode *inode, const char *body); + +int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, + struct path *path, + const struct cpu_key *key, + struct inode *inode, + const char *body, int paste_size); + +int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, + struct path *path, + struct cpu_key *key, + struct inode *inode, + struct page *page, loff_t new_file_size); + +int reiserfs_delete_item(struct reiserfs_transaction_handle *th, + struct path *path, + const struct cpu_key *key, + struct inode *inode, struct buffer_head *p_s_un_bh); + +void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, + struct inode *inode, struct reiserfs_key *key); +int reiserfs_delete_object(struct reiserfs_transaction_handle *th, + struct inode *p_s_inode); +int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, + struct inode *p_s_inode, struct page *, + int update_timestamps); #define i_block_size(inode) ((inode)->i_sb->s_blocksize) #define file_size(inode) ((inode)->i_size) @@ -1885,66 +1849,67 @@ int reiserfs_do_truncate (struct reiserfs_transaction_handle *th, #define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\ !STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 ) -void padd_item (char * item, int total_length, int length); +void padd_item(char *item, int total_length, int length); /* inode.c */ /* args for the create parameter of reiserfs_get_block */ -#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ -#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ -#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ -#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ -#define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */ -#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ - -int restart_transaction(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *path); -void reiserfs_read_locked_inode(struct inode * inode, struct reiserfs_iget_args *args) ; -int reiserfs_find_actor(struct inode * inode, void *p) ; -int reiserfs_init_locked_inode(struct inode * inode, void *p) ; -void reiserfs_delete_inode (struct inode * inode); -int reiserfs_write_inode (struct inode * inode, int) ; -int reiserfs_get_block (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create); -struct dentry *reiserfs_get_dentry(struct super_block *, void *) ; -struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 *data, - int len, int fhtype, - int (*acceptable)(void *contect, struct dentry *de), - void *context) ; -int reiserfs_encode_fh( struct dentry *dentry, __u32 *data, int *lenp, - int connectable ); - -int reiserfs_truncate_file(struct inode *, int update_timestamps) ; -void make_cpu_key (struct cpu_key * cpu_key, struct inode * inode, loff_t offset, - int type, int key_length); -void make_le_item_head (struct item_head * ih, const struct cpu_key * key, - int version, - loff_t offset, int type, int length, int entry_count); -struct inode * reiserfs_iget (struct super_block * s, - const struct cpu_key * key); - - -int reiserfs_new_inode (struct reiserfs_transaction_handle *th, - struct inode * dir, int mode, - const char * symname, loff_t i_size, - struct dentry *dentry, struct inode *inode); - -void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th, - struct inode * inode, loff_t size); +#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ +#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ +#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ +#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ +#define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */ +#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ + +int restart_transaction(struct reiserfs_transaction_handle *th, + struct inode *inode, struct path *path); +void reiserfs_read_locked_inode(struct inode *inode, + struct reiserfs_iget_args *args); +int reiserfs_find_actor(struct inode *inode, void *p); +int reiserfs_init_locked_inode(struct inode *inode, void *p); +void reiserfs_delete_inode(struct inode *inode); +int reiserfs_write_inode(struct inode *inode, int); +int reiserfs_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create); +struct dentry *reiserfs_get_dentry(struct super_block *, void *); +struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, + int len, int fhtype, + int (*acceptable) (void *contect, + struct dentry * de), + void *context); +int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, + int connectable); + +int reiserfs_truncate_file(struct inode *, int update_timestamps); +void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, + int type, int key_length); +void make_le_item_head(struct item_head *ih, const struct cpu_key *key, + int version, + loff_t offset, int type, int length, int entry_count); +struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key); + +int reiserfs_new_inode(struct reiserfs_transaction_handle *th, + struct inode *dir, int mode, + const char *symname, loff_t i_size, + struct dentry *dentry, struct inode *inode); + +void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, + struct inode *inode, loff_t size); static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, - struct inode *inode) + struct inode *inode) { - reiserfs_update_sd_size(th, inode, inode->i_size) ; + reiserfs_update_sd_size(th, inode, inode->i_size); } -void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode ); -void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ); +void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); +void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); /* namei.c */ -void set_de_name_and_namelen (struct reiserfs_dir_entry * de); -int search_by_entry_key (struct super_block * sb, const struct cpu_key * key, - struct path * path, - struct reiserfs_dir_entry * de); -struct dentry *reiserfs_get_parent(struct dentry *) ; +void set_de_name_and_namelen(struct reiserfs_dir_entry *de); +int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, + struct path *path, struct reiserfs_dir_entry *de); +struct dentry *reiserfs_get_parent(struct dentry *); /* procfs.c */ #if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) @@ -1953,15 +1918,15 @@ struct dentry *reiserfs_get_parent(struct dentry *) ; #undef REISERFS_PROC_INFO #endif -int reiserfs_proc_info_init( struct super_block *sb ); -int reiserfs_proc_info_done( struct super_block *sb ); -struct proc_dir_entry *reiserfs_proc_register_global( char *name, - read_proc_t *func ); -void reiserfs_proc_unregister_global( const char *name ); -int reiserfs_proc_info_global_init( void ); -int reiserfs_proc_info_global_done( void ); -int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset, - int count, int *eof, void *data ); +int reiserfs_proc_info_init(struct super_block *sb); +int reiserfs_proc_info_done(struct super_block *sb); +struct proc_dir_entry *reiserfs_proc_register_global(char *name, + read_proc_t * func); +void reiserfs_proc_unregister_global(const char *name); +int reiserfs_proc_info_global_init(void); +int reiserfs_proc_info_global_done(void); +int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset, + int count, int *eof, void *data); #if defined( REISERFS_PROC_INFO ) @@ -1993,123 +1958,132 @@ extern struct inode_operations reiserfs_special_inode_operations; extern struct file_operations reiserfs_dir_operations; /* tail_conversion.c */ -int direct2indirect (struct reiserfs_transaction_handle *, struct inode *, struct path *, struct buffer_head *, loff_t); -int indirect2direct (struct reiserfs_transaction_handle *, struct inode *, struct page *, struct path *, const struct cpu_key *, loff_t, char *); -void reiserfs_unmap_buffer(struct buffer_head *) ; - +int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, + struct path *, struct buffer_head *, loff_t); +int indirect2direct(struct reiserfs_transaction_handle *, struct inode *, + struct page *, struct path *, const struct cpu_key *, + loff_t, char *); +void reiserfs_unmap_buffer(struct buffer_head *); /* file.c */ extern struct inode_operations reiserfs_file_inode_operations; extern struct file_operations reiserfs_file_operations; -extern struct address_space_operations reiserfs_address_space_operations ; +extern struct address_space_operations reiserfs_address_space_operations; /* fix_nodes.c */ #ifdef CONFIG_REISERFS_CHECK -void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s); -void reiserfs_kfree (const void * vp, size_t size, struct super_block * s); +void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s); +void reiserfs_kfree(const void *vp, size_t size, struct super_block *s); #else static inline void *reiserfs_kmalloc(size_t size, int flags, - struct super_block *s) + struct super_block *s) { return kmalloc(size, flags); } static inline void reiserfs_kfree(const void *vp, size_t size, - struct super_block *s) + struct super_block *s) { kfree(vp); } #endif -int fix_nodes (int n_op_mode, struct tree_balance * p_s_tb, - struct item_head * p_s_ins_ih, const void *); -void unfix_nodes (struct tree_balance *); - +int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, + struct item_head *p_s_ins_ih, const void *); +void unfix_nodes(struct tree_balance *); /* prints.c */ -void reiserfs_panic (struct super_block * s, const char * fmt, ...) __attribute__ ( ( noreturn ) ); -void reiserfs_info (struct super_block *s, const char * fmt, ...); -void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...); -void print_indirect_item (struct buffer_head * bh, int item_num); -void store_print_tb (struct tree_balance * tb); -void print_cur_tb (char * mes); -void print_de (struct reiserfs_dir_entry * de); -void print_bi (struct buffer_info * bi, char * mes); -#define PRINT_LEAF_ITEMS 1 /* print all items */ -#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ -#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ -void print_block (struct buffer_head * bh, ...); -void print_bmap (struct super_block * s, int silent); -void print_bmap_block (int i, char * data, int size, int silent); +void reiserfs_panic(struct super_block *s, const char *fmt, ...) + __attribute__ ((noreturn)); +void reiserfs_info(struct super_block *s, const char *fmt, ...); +void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...); +void print_indirect_item(struct buffer_head *bh, int item_num); +void store_print_tb(struct tree_balance *tb); +void print_cur_tb(char *mes); +void print_de(struct reiserfs_dir_entry *de); +void print_bi(struct buffer_info *bi, char *mes); +#define PRINT_LEAF_ITEMS 1 /* print all items */ +#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ +#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ +void print_block(struct buffer_head *bh, ...); +void print_bmap(struct super_block *s, int silent); +void print_bmap_block(int i, char *data, int size, int silent); /*void print_super_block (struct super_block * s, char * mes);*/ -void print_objectid_map (struct super_block * s); -void print_block_head (struct buffer_head * bh, char * mes); -void check_leaf (struct buffer_head * bh); -void check_internal (struct buffer_head * bh); -void print_statistics (struct super_block * s); -char * reiserfs_hashname(int code); +void print_objectid_map(struct super_block *s); +void print_block_head(struct buffer_head *bh, char *mes); +void check_leaf(struct buffer_head *bh); +void check_internal(struct buffer_head *bh); +void print_statistics(struct super_block *s); +char *reiserfs_hashname(int code); /* lbalance.c */ -int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew); -int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes); -int leaf_shift_right (struct tree_balance * tb, int shift_num, int shift_bytes); -void leaf_delete_items (struct buffer_info * cur_bi, int last_first, int first, int del_num, int del_bytes); -void leaf_insert_into_buf (struct buffer_info * bi, int before, - struct item_head * inserted_item_ih, const char * inserted_item_body, int zeros_number); -void leaf_paste_in_buffer (struct buffer_info * bi, int pasted_item_num, - int pos_in_item, int paste_size, const char * body, int zeros_number); -void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num, int pos_in_item, - int cut_size); -void leaf_paste_entries (struct buffer_head * bh, int item_num, int before, - int new_entry_count, struct reiserfs_de_head * new_dehs, const char * records, int paste_size); +int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, + int mov_bytes, struct buffer_head *Snew); +int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes); +int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes); +void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first, + int del_num, int del_bytes); +void leaf_insert_into_buf(struct buffer_info *bi, int before, + struct item_head *inserted_item_ih, + const char *inserted_item_body, int zeros_number); +void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num, + int pos_in_item, int paste_size, const char *body, + int zeros_number); +void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, + int pos_in_item, int cut_size); +void leaf_paste_entries(struct buffer_head *bh, int item_num, int before, + int new_entry_count, struct reiserfs_de_head *new_dehs, + const char *records, int paste_size); /* ibalance.c */ -int balance_internal (struct tree_balance * , int, int, struct item_head * , - struct buffer_head **); +int balance_internal(struct tree_balance *, int, int, struct item_head *, + struct buffer_head **); /* do_balance.c */ -void do_balance_mark_leaf_dirty (struct tree_balance * tb, - struct buffer_head * bh, int flag); +void do_balance_mark_leaf_dirty(struct tree_balance *tb, + struct buffer_head *bh, int flag); #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty -void do_balance (struct tree_balance * tb, struct item_head * ih, - const char * body, int flag); -void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh); +void do_balance(struct tree_balance *tb, struct item_head *ih, + const char *body, int flag); +void reiserfs_invalidate_buffer(struct tree_balance *tb, + struct buffer_head *bh); -int get_left_neighbor_position (struct tree_balance * tb, int h); -int get_right_neighbor_position (struct tree_balance * tb, int h); -void replace_key (struct tree_balance * tb, struct buffer_head *, int, struct buffer_head *, int); -void make_empty_node (struct buffer_info *); -struct buffer_head * get_FEB (struct tree_balance *); +int get_left_neighbor_position(struct tree_balance *tb, int h); +int get_right_neighbor_position(struct tree_balance *tb, int h); +void replace_key(struct tree_balance *tb, struct buffer_head *, int, + struct buffer_head *, int); +void make_empty_node(struct buffer_info *); +struct buffer_head *get_FEB(struct tree_balance *); /* bitmap.c */ /* structure contains hints for block allocator, and it is a container for * arguments, such as node, search path, transaction_handle, etc. */ - struct __reiserfs_blocknr_hint { - struct inode * inode; /* inode passed to allocator, if we allocate unf. nodes */ - long block; /* file offset, in blocks */ - struct in_core_key key; - struct path * path; /* search path, used by allocator to deternine search_start by - * various ways */ - struct reiserfs_transaction_handle * th; /* transaction handle is needed to log super blocks and - * bitmap blocks changes */ - b_blocknr_t beg, end; - b_blocknr_t search_start; /* a field used to transfer search start value (block number) +struct __reiserfs_blocknr_hint { + struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ + long block; /* file offset, in blocks */ + struct in_core_key key; + struct path *path; /* search path, used by allocator to deternine search_start by + * various ways */ + struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and + * bitmap blocks changes */ + b_blocknr_t beg, end; + b_blocknr_t search_start; /* a field used to transfer search start value (block number) * between different block allocator procedures * (determine_search_start() and others) */ - int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed - * function that do actual allocation */ + int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed + * function that do actual allocation */ - unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for + unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for * formatted/unformatted blocks with/without preallocation */ - unsigned preallocate:1; + unsigned preallocate:1; }; typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t; -int reiserfs_parse_alloc_options (struct super_block *, char *); -void reiserfs_init_alloc_options (struct super_block *s); +int reiserfs_parse_alloc_options(struct super_block *, char *); +void reiserfs_init_alloc_options(struct super_block *s); /* * given a directory, this will tell you what packing locality @@ -2118,68 +2092,72 @@ void reiserfs_init_alloc_options (struct super_block *s); */ __le32 reiserfs_choose_packing(struct inode *dir); -int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value); -void reiserfs_free_block (struct reiserfs_transaction_handle *th, struct inode *, b_blocknr_t, int for_unformatted); -int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t * , int, int); -extern inline int reiserfs_new_form_blocknrs (struct tree_balance * tb, - b_blocknr_t *new_blocknrs, int amount_needed) +int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); +void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *, + b_blocknr_t, int for_unformatted); +int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int, + int); +extern inline int reiserfs_new_form_blocknrs(struct tree_balance *tb, + b_blocknr_t * new_blocknrs, + int amount_needed) { - reiserfs_blocknr_hint_t hint = { - .th = tb->transaction_handle, - .path = tb->tb_path, - .inode = NULL, - .key = tb->key, - .block = 0, - .formatted_node = 1 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, 0); + reiserfs_blocknr_hint_t hint = { + .th = tb->transaction_handle, + .path = tb->tb_path, + .inode = NULL, + .key = tb->key, + .block = 0, + .formatted_node = 1 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, + 0); } -extern inline int reiserfs_new_unf_blocknrs (struct reiserfs_transaction_handle *th, - struct inode *inode, - b_blocknr_t *new_blocknrs, - struct path * path, long block) +extern inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle + *th, struct inode *inode, + b_blocknr_t * new_blocknrs, + struct path *path, long block) { - reiserfs_blocknr_hint_t hint = { - .th = th, - .path = path, - .inode = inode, - .block = block, - .formatted_node = 0, - .preallocate = 0 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); + reiserfs_blocknr_hint_t hint = { + .th = th, + .path = path, + .inode = inode, + .block = block, + .formatted_node = 0, + .preallocate = 0 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); } #ifdef REISERFS_PREALLOCATE -extern inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle *th, - struct inode * inode, - b_blocknr_t *new_blocknrs, - struct path * path, long block) +extern inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle + *th, struct inode *inode, + b_blocknr_t * new_blocknrs, + struct path *path, long block) { - reiserfs_blocknr_hint_t hint = { - .th = th, - .path = path, - .inode = inode, - .block = block, - .formatted_node = 0, - .preallocate = 1 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); + reiserfs_blocknr_hint_t hint = { + .th = th, + .path = path, + .inode = inode, + .block = block, + .formatted_node = 0, + .preallocate = 1 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); } -void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, - struct inode * inode); -void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th); +void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, + struct inode *inode); +void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th); #endif -void reiserfs_claim_blocks_to_be_allocated( struct super_block *sb, int blocks); -void reiserfs_release_claimed_blocks( struct super_block *sb, int blocks); +void reiserfs_claim_blocks_to_be_allocated(struct super_block *sb, int blocks); +void reiserfs_release_claimed_blocks(struct super_block *sb, int blocks); int reiserfs_can_fit_pages(struct super_block *sb); /* hashes.c */ -__u32 keyed_hash (const signed char *msg, int len); -__u32 yura_hash (const signed char *msg, int len); -__u32 r5_hash (const signed char *msg, int len); +__u32 keyed_hash(const signed char *msg, int len); +__u32 yura_hash(const signed char *msg, int len); +__u32 r5_hash(const signed char *msg, int len); /* the ext2 bit routines adjust for big or little endian as ** appropriate for the arch, so in our laziness we use them rather @@ -2199,11 +2177,10 @@ __u32 r5_hash (const signed char *msg, int len); absolutely safe */ #define SPARE_SPACE 500 - /* prototypes from ioctl.c */ -int reiserfs_ioctl (struct inode * inode, struct file * filp, - unsigned int cmd, unsigned long arg); - +int reiserfs_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); + /* ioctl's command */ #define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) /* define following flags to be the same as in ext2, so that chattr(1), @@ -2218,10 +2195,8 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, would evolve into real per-fs locks */ #define reiserfs_write_lock( sb ) lock_kernel() #define reiserfs_write_unlock( sb ) unlock_kernel() - + /* xattr stuff */ #define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem) -#endif /* _LINUX_REISER_FS_H */ - - +#endif /* _LINUX_REISER_FS_H */ diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h index e321eb050d6..149be8d9a0c 100644 --- a/include/linux/reiserfs_fs_i.h +++ b/include/linux/reiserfs_fs_i.h @@ -10,54 +10,53 @@ typedef enum { /** this says what format of key do all items (but stat data) of an object have. If this is set, that format is 3.6 otherwise - 3.5 */ - i_item_key_version_mask = 0x0001, + i_item_key_version_mask = 0x0001, /** If this is unset, object has 3.5 stat data, otherwise, it has 3.6 stat data with 64bit size, 32bit nlink etc. */ - i_stat_data_version_mask = 0x0002, + i_stat_data_version_mask = 0x0002, /** file might need tail packing on close */ - i_pack_on_close_mask = 0x0004, + i_pack_on_close_mask = 0x0004, /** don't pack tail of file */ - i_nopack_mask = 0x0008, + i_nopack_mask = 0x0008, /** If those is set, "safe link" was created for this file during truncate or unlink. Safe link is used to avoid leakage of disk space on crash with some files open, but unlinked. */ - i_link_saved_unlink_mask = 0x0010, - i_link_saved_truncate_mask = 0x0020, - i_has_xattr_dir = 0x0040, - i_data_log = 0x0080, + i_link_saved_unlink_mask = 0x0010, + i_link_saved_truncate_mask = 0x0020, + i_has_xattr_dir = 0x0040, + i_data_log = 0x0080, } reiserfs_inode_flags; - struct reiserfs_inode_info { - __u32 i_key [4];/* key is still 4 32 bit integers */ + __u32 i_key[4]; /* key is still 4 32 bit integers */ /** transient inode flags that are never stored on disk. Bitmasks for this field are defined above. */ - __u32 i_flags; + __u32 i_flags; - __u32 i_first_direct_byte; // offset of first byte stored in direct item. + __u32 i_first_direct_byte; // offset of first byte stored in direct item. - /* copy of persistent inode flags read from sd_attrs. */ - __u32 i_attrs; + /* copy of persistent inode flags read from sd_attrs. */ + __u32 i_attrs; - int i_prealloc_block; /* first unused block of a sequence of unused blocks */ - int i_prealloc_count; /* length of that sequence */ - struct list_head i_prealloc_list; /* per-transaction list of inodes which - * have preallocated blocks */ + int i_prealloc_block; /* first unused block of a sequence of unused blocks */ + int i_prealloc_count; /* length of that sequence */ + struct list_head i_prealloc_list; /* per-transaction list of inodes which + * have preallocated blocks */ - unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks - * for the contents of this directory should be - * displaced */ + unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks + * for the contents of this directory should be + * displaced */ - /* we use these for fsync or O_SYNC to decide which transaction - ** needs to be committed in order for this inode to be properly - ** flushed */ - unsigned long i_trans_id ; - struct reiserfs_journal_list *i_jl; + /* we use these for fsync or O_SYNC to decide which transaction + ** needs to be committed in order for this inode to be properly + ** flushed */ + unsigned long i_trans_id; + struct reiserfs_journal_list *i_jl; - struct posix_acl *i_acl_access; - struct posix_acl *i_acl_default; - struct rw_semaphore xattr_sem; - struct inode vfs_inode; + struct posix_acl *i_acl_access; + struct posix_acl *i_acl_default; + struct rw_semaphore xattr_sem; + struct inode vfs_inode; }; #endif diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 31c709d0fe1..3e68592e52e 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -10,7 +10,7 @@ #endif typedef enum { - reiserfs_attrs_cleared = 0x00000001, + reiserfs_attrs_cleared = 0x00000001, } reiserfs_super_block_flags; /* struct reiserfs_super_block accessors/mutators @@ -61,7 +61,7 @@ typedef enum { #define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state)) #define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v)) #define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state)) -#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v)) +#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v)) #define sb_hash_function_code(sbp) \ (le32_to_cpu((sbp)->s_v1.s_hash_function_code)) #define set_sb_hash_function_code(sbp,v) \ @@ -103,10 +103,10 @@ typedef enum { /* don't mess with these for a while */ /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ -#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ -#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ -#define JOURNAL_HASH_SIZE 8192 -#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ +#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ +#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ +#define JOURNAL_HASH_SIZE 8192 +#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ /* One of these for every block in every transaction ** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a @@ -117,27 +117,27 @@ typedef enum { ** to a given transaction. */ struct reiserfs_journal_cnode { - struct buffer_head *bh ; /* real buffer head */ - struct super_block *sb ; /* dev of real buffer head */ - __u32 blocknr ; /* block number of real buffer head, == 0 when buffer on disk */ - long state ; - struct reiserfs_journal_list *jlist ; /* journal list this cnode lives in */ - struct reiserfs_journal_cnode *next ; /* next in transaction list */ - struct reiserfs_journal_cnode *prev ; /* prev in transaction list */ - struct reiserfs_journal_cnode *hprev ; /* prev in hash list */ - struct reiserfs_journal_cnode *hnext ; /* next in hash list */ + struct buffer_head *bh; /* real buffer head */ + struct super_block *sb; /* dev of real buffer head */ + __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */ + long state; + struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */ + struct reiserfs_journal_cnode *next; /* next in transaction list */ + struct reiserfs_journal_cnode *prev; /* prev in transaction list */ + struct reiserfs_journal_cnode *hprev; /* prev in hash list */ + struct reiserfs_journal_cnode *hnext; /* next in hash list */ }; struct reiserfs_bitmap_node { - int id ; - char *data ; - struct list_head list ; -} ; + int id; + char *data; + struct list_head list; +}; struct reiserfs_list_bitmap { - struct reiserfs_journal_list *journal_list ; - struct reiserfs_bitmap_node **bitmaps ; -} ; + struct reiserfs_journal_list *journal_list; + struct reiserfs_bitmap_node **bitmaps; +}; /* ** one of these for each transaction. The most important part here is the j_realblock. @@ -146,273 +146,269 @@ struct reiserfs_list_bitmap { ** and to make sure every real block in a transaction is on disk before allowing the log area ** to be overwritten */ struct reiserfs_journal_list { - unsigned long j_start ; - unsigned long j_state; - unsigned long j_len ; - atomic_t j_nonzerolen ; - atomic_t j_commit_left ; - atomic_t j_older_commits_done ; /* all commits older than this on disk*/ - struct semaphore j_commit_lock; - unsigned long j_trans_id ; - time_t j_timestamp ; - struct reiserfs_list_bitmap *j_list_bitmap ; - struct buffer_head *j_commit_bh ; /* commit buffer head */ - struct reiserfs_journal_cnode *j_realblock ; - struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans. free each of these on flush */ - /* time ordered list of all active transactions */ - struct list_head j_list; - - /* time ordered list of all transactions we haven't tried to flush yet */ - struct list_head j_working_list; - - /* list of tail conversion targets in need of flush before commit */ - struct list_head j_tail_bh_list; - /* list of data=ordered buffers in need of flush before commit */ - struct list_head j_bh_list; - int j_refcount; -} ; + unsigned long j_start; + unsigned long j_state; + unsigned long j_len; + atomic_t j_nonzerolen; + atomic_t j_commit_left; + atomic_t j_older_commits_done; /* all commits older than this on disk */ + struct semaphore j_commit_lock; + unsigned long j_trans_id; + time_t j_timestamp; + struct reiserfs_list_bitmap *j_list_bitmap; + struct buffer_head *j_commit_bh; /* commit buffer head */ + struct reiserfs_journal_cnode *j_realblock; + struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */ + /* time ordered list of all active transactions */ + struct list_head j_list; + + /* time ordered list of all transactions we haven't tried to flush yet */ + struct list_head j_working_list; + + /* list of tail conversion targets in need of flush before commit */ + struct list_head j_tail_bh_list; + /* list of data=ordered buffers in need of flush before commit */ + struct list_head j_bh_list; + int j_refcount; +}; struct reiserfs_journal { - struct buffer_head ** j_ap_blocks ; /* journal blocks on disk */ - struct reiserfs_journal_cnode *j_last ; /* newest journal block */ - struct reiserfs_journal_cnode *j_first ; /* oldest journal block. start here for traverse */ - - struct file *j_dev_file; - struct block_device *j_dev_bd; - int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ - - long j_state ; - unsigned long j_trans_id ; - unsigned long j_mount_id ; - unsigned long j_start ; /* start of current waiting commit (index into j_ap_blocks) */ - unsigned long j_len ; /* lenght of current waiting commit */ - unsigned long j_len_alloc ; /* number of buffers requested by journal_begin() */ - atomic_t j_wcount ; /* count of writers for current commit */ - unsigned long j_bcount ; /* batch count. allows turning X transactions into 1 */ - unsigned long j_first_unflushed_offset ; /* first unflushed transactions offset */ - unsigned long j_last_flush_trans_id ; /* last fully flushed journal timestamp */ - struct buffer_head *j_header_bh ; - - time_t j_trans_start_time ; /* time this transaction started */ - struct semaphore j_lock; - struct semaphore j_flush_sem; - wait_queue_head_t j_join_wait ; /* wait for current transaction to finish before starting new one */ - atomic_t j_jlock ; /* lock for j_join_wait */ - int j_list_bitmap_index ; /* number of next list bitmap to use */ - int j_must_wait ; /* no more journal begins allowed. MUST sleep on j_join_wait */ - int j_next_full_flush ; /* next journal_end will flush all journal list */ - int j_next_async_flush ; /* next journal_end will flush all async commits */ - - int j_cnode_used ; /* number of cnodes on the used list */ - int j_cnode_free ; /* number of cnodes on the free list */ - - unsigned int j_trans_max ; /* max number of blocks in a transaction. */ - unsigned int j_max_batch ; /* max number of blocks to batch into a trans */ - unsigned int j_max_commit_age ; /* in seconds, how old can an async commit be */ - unsigned int j_max_trans_age ; /* in seconds, how old can a transaction be */ - unsigned int j_default_max_commit_age ; /* the default for the max commit age */ - - struct reiserfs_journal_cnode *j_cnode_free_list ; - struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */ - - struct reiserfs_journal_list *j_current_jl; - int j_free_bitmap_nodes ; - int j_used_bitmap_nodes ; - - int j_num_lists; /* total number of active transactions */ - int j_num_work_lists; /* number that need attention from kreiserfsd */ - - /* debugging to make sure things are flushed in order */ - int j_last_flush_id; - - /* debugging to make sure things are committed in order */ - int j_last_commit_id; - - struct list_head j_bitmap_nodes ; - struct list_head j_dirty_buffers ; - spinlock_t j_dirty_buffers_lock ; /* protects j_dirty_buffers */ - - /* list of all active transactions */ - struct list_head j_journal_list; - /* lists that haven't been touched by writeback attempts */ - struct list_head j_working_list; - - struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ; /* array of bitmaps to record the deleted blocks */ - struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for real buffer heads in current trans */ - struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all - the transactions */ - struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ - int j_persistent_trans; - unsigned long j_max_trans_size ; - unsigned long j_max_batch_size ; - - int j_errno; - - /* when flushing ordered buffers, throttle new ordered writers */ - struct work_struct j_work; - atomic_t j_async_throttle; + struct buffer_head **j_ap_blocks; /* journal blocks on disk */ + struct reiserfs_journal_cnode *j_last; /* newest journal block */ + struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ + + struct file *j_dev_file; + struct block_device *j_dev_bd; + int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ + + long j_state; + unsigned long j_trans_id; + unsigned long j_mount_id; + unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */ + unsigned long j_len; /* lenght of current waiting commit */ + unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */ + atomic_t j_wcount; /* count of writers for current commit */ + unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ + unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ + unsigned long j_last_flush_trans_id; /* last fully flushed journal timestamp */ + struct buffer_head *j_header_bh; + + time_t j_trans_start_time; /* time this transaction started */ + struct semaphore j_lock; + struct semaphore j_flush_sem; + wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ + atomic_t j_jlock; /* lock for j_join_wait */ + int j_list_bitmap_index; /* number of next list bitmap to use */ + int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */ + int j_next_full_flush; /* next journal_end will flush all journal list */ + int j_next_async_flush; /* next journal_end will flush all async commits */ + + int j_cnode_used; /* number of cnodes on the used list */ + int j_cnode_free; /* number of cnodes on the free list */ + + unsigned int j_trans_max; /* max number of blocks in a transaction. */ + unsigned int j_max_batch; /* max number of blocks to batch into a trans */ + unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */ + unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */ + unsigned int j_default_max_commit_age; /* the default for the max commit age */ + + struct reiserfs_journal_cnode *j_cnode_free_list; + struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */ + + struct reiserfs_journal_list *j_current_jl; + int j_free_bitmap_nodes; + int j_used_bitmap_nodes; + + int j_num_lists; /* total number of active transactions */ + int j_num_work_lists; /* number that need attention from kreiserfsd */ + + /* debugging to make sure things are flushed in order */ + int j_last_flush_id; + + /* debugging to make sure things are committed in order */ + int j_last_commit_id; + + struct list_head j_bitmap_nodes; + struct list_head j_dirty_buffers; + spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */ + + /* list of all active transactions */ + struct list_head j_journal_list; + /* lists that haven't been touched by writeback attempts */ + struct list_head j_working_list; + + struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */ + struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */ + struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all + the transactions */ + struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ + int j_persistent_trans; + unsigned long j_max_trans_size; + unsigned long j_max_batch_size; + + int j_errno; + + /* when flushing ordered buffers, throttle new ordered writers */ + struct work_struct j_work; + atomic_t j_async_throttle; }; enum journal_state_bits { - J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ - J_WRITERS_QUEUED, /* set when log is full due to too many writers */ - J_ABORTED, /* set when log is aborted */ + J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ + J_WRITERS_QUEUED, /* set when log is full due to too many writers */ + J_ABORTED, /* set when log is aborted */ }; +#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ -#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ +typedef __u32(*hashf_t) (const signed char *, int); -typedef __u32 (*hashf_t) (const signed char *, int); - -struct reiserfs_bitmap_info -{ - // FIXME: Won't work with block sizes > 8K - __u16 first_zero_hint; - __u16 free_count; - struct buffer_head *bh; /* the actual bitmap */ +struct reiserfs_bitmap_info { + // FIXME: Won't work with block sizes > 8K + __u16 first_zero_hint; + __u16 free_count; + struct buffer_head *bh; /* the actual bitmap */ }; struct proc_dir_entry; #if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) typedef unsigned long int stat_cnt_t; -typedef struct reiserfs_proc_info_data -{ - spinlock_t lock; - int exiting; - int max_hash_collisions; - - stat_cnt_t breads; - stat_cnt_t bread_miss; - stat_cnt_t search_by_key; - stat_cnt_t search_by_key_fs_changed; - stat_cnt_t search_by_key_restarted; - - stat_cnt_t insert_item_restarted; - stat_cnt_t paste_into_item_restarted; - stat_cnt_t cut_from_item_restarted; - stat_cnt_t delete_solid_item_restarted; - stat_cnt_t delete_item_restarted; - - stat_cnt_t leaked_oid; - stat_cnt_t leaves_removable; - - /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */ - stat_cnt_t balance_at[ 5 ]; /* XXX */ - /* sbk == search_by_key */ - stat_cnt_t sbk_read_at[ 5 ]; /* XXX */ - stat_cnt_t sbk_fs_changed[ 5 ]; - stat_cnt_t sbk_restarted[ 5 ]; - stat_cnt_t items_at[ 5 ]; /* XXX */ - stat_cnt_t free_at[ 5 ]; /* XXX */ - stat_cnt_t can_node_be_removed[ 5 ]; /* XXX */ - long int lnum[ 5 ]; /* XXX */ - long int rnum[ 5 ]; /* XXX */ - long int lbytes[ 5 ]; /* XXX */ - long int rbytes[ 5 ]; /* XXX */ - stat_cnt_t get_neighbors[ 5 ]; - stat_cnt_t get_neighbors_restart[ 5 ]; - stat_cnt_t need_l_neighbor[ 5 ]; - stat_cnt_t need_r_neighbor[ 5 ]; - - stat_cnt_t free_block; - struct __scan_bitmap_stats { - stat_cnt_t call; - stat_cnt_t wait; - stat_cnt_t bmap; - stat_cnt_t retry; - stat_cnt_t in_journal_hint; - stat_cnt_t in_journal_nohint; - stat_cnt_t stolen; - } scan_bitmap; - struct __journal_stats { - stat_cnt_t in_journal; - stat_cnt_t in_journal_bitmap; - stat_cnt_t in_journal_reusable; - stat_cnt_t lock_journal; - stat_cnt_t lock_journal_wait; - stat_cnt_t journal_being; - stat_cnt_t journal_relock_writers; - stat_cnt_t journal_relock_wcount; - stat_cnt_t mark_dirty; - stat_cnt_t mark_dirty_already; - stat_cnt_t mark_dirty_notjournal; - stat_cnt_t restore_prepared; - stat_cnt_t prepare; - stat_cnt_t prepare_retry; - } journal; +typedef struct reiserfs_proc_info_data { + spinlock_t lock; + int exiting; + int max_hash_collisions; + + stat_cnt_t breads; + stat_cnt_t bread_miss; + stat_cnt_t search_by_key; + stat_cnt_t search_by_key_fs_changed; + stat_cnt_t search_by_key_restarted; + + stat_cnt_t insert_item_restarted; + stat_cnt_t paste_into_item_restarted; + stat_cnt_t cut_from_item_restarted; + stat_cnt_t delete_solid_item_restarted; + stat_cnt_t delete_item_restarted; + + stat_cnt_t leaked_oid; + stat_cnt_t leaves_removable; + + /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */ + stat_cnt_t balance_at[5]; /* XXX */ + /* sbk == search_by_key */ + stat_cnt_t sbk_read_at[5]; /* XXX */ + stat_cnt_t sbk_fs_changed[5]; + stat_cnt_t sbk_restarted[5]; + stat_cnt_t items_at[5]; /* XXX */ + stat_cnt_t free_at[5]; /* XXX */ + stat_cnt_t can_node_be_removed[5]; /* XXX */ + long int lnum[5]; /* XXX */ + long int rnum[5]; /* XXX */ + long int lbytes[5]; /* XXX */ + long int rbytes[5]; /* XXX */ + stat_cnt_t get_neighbors[5]; + stat_cnt_t get_neighbors_restart[5]; + stat_cnt_t need_l_neighbor[5]; + stat_cnt_t need_r_neighbor[5]; + + stat_cnt_t free_block; + struct __scan_bitmap_stats { + stat_cnt_t call; + stat_cnt_t wait; + stat_cnt_t bmap; + stat_cnt_t retry; + stat_cnt_t in_journal_hint; + stat_cnt_t in_journal_nohint; + stat_cnt_t stolen; + } scan_bitmap; + struct __journal_stats { + stat_cnt_t in_journal; + stat_cnt_t in_journal_bitmap; + stat_cnt_t in_journal_reusable; + stat_cnt_t lock_journal; + stat_cnt_t lock_journal_wait; + stat_cnt_t journal_being; + stat_cnt_t journal_relock_writers; + stat_cnt_t journal_relock_wcount; + stat_cnt_t mark_dirty; + stat_cnt_t mark_dirty_already; + stat_cnt_t mark_dirty_notjournal; + stat_cnt_t restore_prepared; + stat_cnt_t prepare; + stat_cnt_t prepare_retry; + } journal; } reiserfs_proc_info_data_t; #else -typedef struct reiserfs_proc_info_data -{} reiserfs_proc_info_data_t; +typedef struct reiserfs_proc_info_data { +} reiserfs_proc_info_data_t; #endif /* reiserfs union of in-core super block data */ -struct reiserfs_sb_info -{ - struct buffer_head * s_sbh; /* Buffer containing the super block */ - /* both the comment and the choice of - name are unclear for s_rs -Hans */ - struct reiserfs_super_block * s_rs; /* Pointer to the super block in the buffer */ - struct reiserfs_bitmap_info * s_ap_bitmap; - struct reiserfs_journal *s_journal ; /* pointer to journal information */ - unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ - - /* Comment? -Hans */ - void (*end_io_handler)(struct buffer_head *, int); - hashf_t s_hash_function; /* pointer to function which is used - to sort names in directory. Set on - mount */ - unsigned long s_mount_opt; /* reiserfs's mount options are set - here (currently - NOTAIL, NOLOG, - REPLAYONLY) */ - - struct { /* This is a structure that describes block allocator options */ - unsigned long bits; /* Bitfield for enable/disable kind of options */ - unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ - int border; /* percentage of disk, border takes */ - int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ - int preallocsize; /* Number of blocks we try to prealloc when file - reaches preallocmin size (in blocks) or - prealloc_list is empty. */ - } s_alloc_options; - - /* Comment? -Hans */ - wait_queue_head_t s_wait; - /* To be obsoleted soon by per buffer seals.. -Hans */ - atomic_t s_generation_counter; // increased by one every time the - // tree gets re-balanced - unsigned long s_properties; /* File system properties. Currently holds - on-disk FS format */ - - /* session statistics */ - int s_kmallocs; - int s_disk_reads; - int s_disk_writes; - int s_fix_nodes; - int s_do_balance; - int s_unneeded_left_neighbor; - int s_good_search_by_key_reada; - int s_bmaps; - int s_bmaps_without_search; - int s_direct2indirect; - int s_indirect2direct; +struct reiserfs_sb_info { + struct buffer_head *s_sbh; /* Buffer containing the super block */ + /* both the comment and the choice of + name are unclear for s_rs -Hans */ + struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */ + struct reiserfs_bitmap_info *s_ap_bitmap; + struct reiserfs_journal *s_journal; /* pointer to journal information */ + unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ + + /* Comment? -Hans */ + void (*end_io_handler) (struct buffer_head *, int); + hashf_t s_hash_function; /* pointer to function which is used + to sort names in directory. Set on + mount */ + unsigned long s_mount_opt; /* reiserfs's mount options are set + here (currently - NOTAIL, NOLOG, + REPLAYONLY) */ + + struct { /* This is a structure that describes block allocator options */ + unsigned long bits; /* Bitfield for enable/disable kind of options */ + unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ + int border; /* percentage of disk, border takes */ + int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ + int preallocsize; /* Number of blocks we try to prealloc when file + reaches preallocmin size (in blocks) or + prealloc_list is empty. */ + } s_alloc_options; + + /* Comment? -Hans */ + wait_queue_head_t s_wait; + /* To be obsoleted soon by per buffer seals.. -Hans */ + atomic_t s_generation_counter; // increased by one every time the + // tree gets re-balanced + unsigned long s_properties; /* File system properties. Currently holds + on-disk FS format */ + + /* session statistics */ + int s_kmallocs; + int s_disk_reads; + int s_disk_writes; + int s_fix_nodes; + int s_do_balance; + int s_unneeded_left_neighbor; + int s_good_search_by_key_reada; + int s_bmaps; + int s_bmaps_without_search; + int s_direct2indirect; + int s_indirect2direct; /* set up when it's ok for reiserfs_read_inode2() to read from disk inode with nlink==0. Currently this is only used during finish_unfinished() processing at mount time */ - int s_is_unlinked_ok; - reiserfs_proc_info_data_t s_proc_info_data; - struct proc_dir_entry *procdir; - int reserved_blocks; /* amount of blocks reserved for further allocations */ - spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ - struct dentry *priv_root; /* root of /.reiserfs_priv */ - struct dentry *xattr_root; /* root of /.reiserfs_priv/.xa */ - struct rw_semaphore xattr_dir_sem; - - int j_errno; + int s_is_unlinked_ok; + reiserfs_proc_info_data_t s_proc_info_data; + struct proc_dir_entry *procdir; + int reserved_blocks; /* amount of blocks reserved for further allocations */ + spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ + struct dentry *priv_root; /* root of /.reiserfs_priv */ + struct dentry *xattr_root; /* root of /.reiserfs_priv/.xa */ + struct rw_semaphore xattr_dir_sem; + + int j_errno; #ifdef CONFIG_QUOTA - char *s_qf_names[MAXQUOTAS]; - int s_jquota_fmt; + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; #endif }; @@ -422,14 +418,14 @@ struct reiserfs_sb_info enum reiserfs_mount_options { /* Mount options */ - REISERFS_LARGETAIL, /* large tails will be created in a session */ - REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ - REPLAYONLY, /* replay journal and return 0. Use by fsck */ - REISERFS_CONVERT, /* -o conv: causes conversion of old - format super block to the new - format. If not specified - old - partition will be dealt with in a - manner of 3.5.x */ + REISERFS_LARGETAIL, /* large tails will be created in a session */ + REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ + REPLAYONLY, /* replay journal and return 0. Use by fsck */ + REISERFS_CONVERT, /* -o conv: causes conversion of old + format super block to the new + format. If not specified - old + partition will be dealt with in a + manner of 3.5.x */ /* -o hash={tea, rupasov, r5, detect} is meant for properly mounting ** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option @@ -439,41 +435,41 @@ enum reiserfs_mount_options { ** the existing hash on the FS, so if you have a tea hash disk, and mount ** with -o hash=rupasov, the mount will fail. */ - FORCE_TEA_HASH, /* try to force tea hash on mount */ - FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ - FORCE_R5_HASH, /* try to force rupasov hash on mount */ - FORCE_HASH_DETECT, /* try to detect hash function on mount */ + FORCE_TEA_HASH, /* try to force tea hash on mount */ + FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ + FORCE_R5_HASH, /* try to force rupasov hash on mount */ + FORCE_HASH_DETECT, /* try to detect hash function on mount */ - REISERFS_DATA_LOG, - REISERFS_DATA_ORDERED, - REISERFS_DATA_WRITEBACK, + REISERFS_DATA_LOG, + REISERFS_DATA_ORDERED, + REISERFS_DATA_WRITEBACK, /* used for testing experimental features, makes benchmarking new features with and without more convenient, should never be used by users in any code shipped to users (ideally) */ - REISERFS_NO_BORDER, - REISERFS_NO_UNHASHED_RELOCATION, - REISERFS_HASHED_RELOCATION, - REISERFS_ATTRS, - REISERFS_XATTRS, - REISERFS_XATTRS_USER, - REISERFS_POSIXACL, - REISERFS_BARRIER_NONE, - REISERFS_BARRIER_FLUSH, - - /* Actions on error */ - REISERFS_ERROR_PANIC, - REISERFS_ERROR_RO, - REISERFS_ERROR_CONTINUE, - - REISERFS_QUOTA, /* Some quota option specified */ - - REISERFS_TEST1, - REISERFS_TEST2, - REISERFS_TEST3, - REISERFS_TEST4, - REISERFS_UNSUPPORTED_OPT, + REISERFS_NO_BORDER, + REISERFS_NO_UNHASHED_RELOCATION, + REISERFS_HASHED_RELOCATION, + REISERFS_ATTRS, + REISERFS_XATTRS, + REISERFS_XATTRS_USER, + REISERFS_POSIXACL, + REISERFS_BARRIER_NONE, + REISERFS_BARRIER_FLUSH, + + /* Actions on error */ + REISERFS_ERROR_PANIC, + REISERFS_ERROR_RO, + REISERFS_ERROR_CONTINUE, + + REISERFS_QUOTA, /* Some quota option specified */ + + REISERFS_TEST1, + REISERFS_TEST2, + REISERFS_TEST3, + REISERFS_TEST4, + REISERFS_UNSUPPORTED_OPT, }; #define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH)) @@ -504,18 +500,17 @@ enum reiserfs_mount_options { #define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC)) #define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO)) -void reiserfs_file_buffer (struct buffer_head * bh, int list); +void reiserfs_file_buffer(struct buffer_head *bh, int list); extern struct file_system_type reiserfs_fs_type; -int reiserfs_resize(struct super_block *, unsigned long) ; +int reiserfs_resize(struct super_block *, unsigned long); #define CARRY_ON 0 #define SCHEDULE_OCCURRED 1 - #define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh) #define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal) #define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block) -#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) +#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) #define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap) #define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->) @@ -525,13 +520,14 @@ int reiserfs_resize(struct super_block *, unsigned long) ; */ static inline char *reiserfs_bdevname(struct super_block *s) { - return (s == NULL) ? "Null superblock" : s -> s_id; + return (s == NULL) ? "Null superblock" : s->s_id; } #define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal))) -static inline int __reiserfs_is_journal_aborted (struct reiserfs_journal *journal) +static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal + *journal) { - return test_bit (J_ABORTED, &journal->j_state); + return test_bit(J_ABORTED, &journal->j_state); } -#endif /* _LINUX_REISER_FS_SB */ +#endif /* _LINUX_REISER_FS_SB */ diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index 9244c574882..c84354e8374 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -7,48 +7,48 @@ #include /* Magic value in header */ -#define REISERFS_XATTR_MAGIC 0x52465841 /* "RFXA" */ +#define REISERFS_XATTR_MAGIC 0x52465841 /* "RFXA" */ struct reiserfs_xattr_header { - __le32 h_magic; /* magic number for identification */ - __le32 h_hash; /* hash of the value */ + __le32 h_magic; /* magic number for identification */ + __le32 h_hash; /* hash of the value */ }; #ifdef __KERNEL__ struct reiserfs_xattr_handler { char *prefix; - int (*init)(void); - void (*exit)(void); - int (*get)(struct inode *inode, const char *name, void *buffer, - size_t size); - int (*set)(struct inode *inode, const char *name, const void *buffer, - size_t size, int flags); - int (*del)(struct inode *inode, const char *name); - int (*list)(struct inode *inode, const char *name, int namelen, char *out); - struct list_head handlers; + int (*init) (void); + void (*exit) (void); + int (*get) (struct inode * inode, const char *name, void *buffer, + size_t size); + int (*set) (struct inode * inode, const char *name, const void *buffer, + size_t size, int flags); + int (*del) (struct inode * inode, const char *name); + int (*list) (struct inode * inode, const char *name, int namelen, + char *out); + struct list_head handlers; }; - #ifdef CONFIG_REISERFS_FS_XATTR #define is_reiserfs_priv_object(inode) IS_PRIVATE(inode) #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) -ssize_t reiserfs_getxattr (struct dentry *dentry, const char *name, - void *buffer, size_t size); -int reiserfs_setxattr (struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -ssize_t reiserfs_listxattr (struct dentry *dentry, char *buffer, size_t size); -int reiserfs_removexattr (struct dentry *dentry, const char *name); -int reiserfs_delete_xattrs (struct inode *inode); -int reiserfs_chown_xattrs (struct inode *inode, struct iattr *attrs); -int reiserfs_xattr_init (struct super_block *sb, int mount_flags); -int reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd); -int reiserfs_permission_locked (struct inode *inode, int mask, struct nameidata *nd); - -int reiserfs_xattr_del (struct inode *, const char *); -int reiserfs_xattr_get (const struct inode *, const char *, void *, size_t); -int reiserfs_xattr_set (struct inode *, const char *, const void *, - size_t, int); +ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size); +int reiserfs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); +int reiserfs_removexattr(struct dentry *dentry, const char *name); +int reiserfs_delete_xattrs(struct inode *inode); +int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); +int reiserfs_xattr_init(struct super_block *sb, int mount_flags); +int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd); +int reiserfs_permission_locked(struct inode *inode, int mask, + struct nameidata *nd); + +int reiserfs_xattr_del(struct inode *, const char *); +int reiserfs_xattr_get(const struct inode *, const char *, void *, size_t); +int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); extern struct reiserfs_xattr_handler user_handler; extern struct reiserfs_xattr_handler trusted_handler; @@ -56,57 +56,48 @@ extern struct reiserfs_xattr_handler trusted_handler; extern struct reiserfs_xattr_handler security_handler; #endif -int reiserfs_xattr_register_handlers (void) __init; -void reiserfs_xattr_unregister_handlers (void); +int reiserfs_xattr_register_handlers(void) __init; +void reiserfs_xattr_unregister_handlers(void); -static inline void -reiserfs_write_lock_xattrs(struct super_block *sb) +static inline void reiserfs_write_lock_xattrs(struct super_block *sb) { - down_write (&REISERFS_XATTR_DIR_SEM(sb)); + down_write(&REISERFS_XATTR_DIR_SEM(sb)); } -static inline void -reiserfs_write_unlock_xattrs(struct super_block *sb) +static inline void reiserfs_write_unlock_xattrs(struct super_block *sb) { - up_write (&REISERFS_XATTR_DIR_SEM(sb)); + up_write(&REISERFS_XATTR_DIR_SEM(sb)); } -static inline void -reiserfs_read_lock_xattrs(struct super_block *sb) +static inline void reiserfs_read_lock_xattrs(struct super_block *sb) { - down_read (&REISERFS_XATTR_DIR_SEM(sb)); + down_read(&REISERFS_XATTR_DIR_SEM(sb)); } -static inline void -reiserfs_read_unlock_xattrs(struct super_block *sb) +static inline void reiserfs_read_unlock_xattrs(struct super_block *sb) { - up_read (&REISERFS_XATTR_DIR_SEM(sb)); + up_read(&REISERFS_XATTR_DIR_SEM(sb)); } -static inline void -reiserfs_write_lock_xattr_i(struct inode *inode) +static inline void reiserfs_write_lock_xattr_i(struct inode *inode) { - down_write (&REISERFS_I(inode)->xattr_sem); + down_write(&REISERFS_I(inode)->xattr_sem); } -static inline void -reiserfs_write_unlock_xattr_i(struct inode *inode) +static inline void reiserfs_write_unlock_xattr_i(struct inode *inode) { - up_write (&REISERFS_I(inode)->xattr_sem); + up_write(&REISERFS_I(inode)->xattr_sem); } -static inline void -reiserfs_read_lock_xattr_i(struct inode *inode) +static inline void reiserfs_read_lock_xattr_i(struct inode *inode) { - down_read (&REISERFS_I(inode)->xattr_sem); + down_read(&REISERFS_I(inode)->xattr_sem); } -static inline void -reiserfs_read_unlock_xattr_i(struct inode *inode) +static inline void reiserfs_read_unlock_xattr_i(struct inode *inode) { - up_read (&REISERFS_I(inode)->xattr_sem); + up_read(&REISERFS_I(inode)->xattr_sem); } -static inline void -reiserfs_mark_inode_private(struct inode *inode) +static inline void reiserfs_mark_inode_private(struct inode *inode) { - inode->i_flags |= S_PRIVATE; + inode->i_flags |= S_PRIVATE; } #else @@ -127,13 +118,20 @@ reiserfs_mark_inode_private(struct inode *inode) #define reiserfs_xattr_register_handlers() 0 #define reiserfs_xattr_unregister_handlers() -static inline int reiserfs_delete_xattrs (struct inode *inode) { return 0; }; -static inline int reiserfs_chown_xattrs (struct inode *inode, struct iattr *attrs) { return 0; }; -static inline int reiserfs_xattr_init (struct super_block *sb, int mount_flags) +static inline int reiserfs_delete_xattrs(struct inode *inode) +{ + return 0; +}; +static inline int reiserfs_chown_xattrs(struct inode *inode, + struct iattr *attrs) +{ + return 0; +}; +static inline int reiserfs_xattr_init(struct super_block *sb, int mount_flags) { - sb->s_flags = (sb->s_flags & ~MS_POSIXACL); /* to be sure */ - return 0; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL); /* to be sure */ + return 0; }; #endif -#endif /* __KERNEL__ */ +#endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From 0eeca28300df110bd6ed54b31193c83b87921443 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 12 Jul 2005 17:06:03 -0400 Subject: [PATCH] inotify inotify is intended to correct the deficiencies of dnotify, particularly its inability to scale and its terrible user interface: * dnotify requires the opening of one fd per each directory that you intend to watch. This quickly results in too many open files and pins removable media, preventing unmount. * dnotify is directory-based. You only learn about changes to directories. Sure, a change to a file in a directory affects the directory, but you are then forced to keep a cache of stat structures. * dnotify's interface to user-space is awful. Signals? inotify provides a more usable, simple, powerful solution to file change notification: * inotify's interface is a system call that returns a fd, not SIGIO. You get a single fd, which is select()-able. * inotify has an event that says "the filesystem that the item you were watching is on was unmounted." * inotify can watch directories or files. Inotify is currently used by Beagle (a desktop search infrastructure), Gamin (a FAM replacement), and other projects. See Documentation/filesystems/inotify.txt. Signed-off-by: Robert Love Cc: John McCutchan Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/inotify.txt | 138 +++++ arch/i386/kernel/syscall_table.S | 3 + fs/Kconfig | 13 + fs/Makefile | 1 + fs/attr.c | 33 +- fs/compat.c | 12 +- fs/file_table.c | 3 + fs/inode.c | 6 + fs/inotify.c | 999 ++++++++++++++++++++++++++++++++++ fs/namei.c | 30 +- fs/nfsd/vfs.c | 6 +- fs/open.c | 3 +- fs/read_write.c | 15 +- fs/sysfs/file.c | 7 +- fs/xattr.c | 5 +- include/asm-i386/unistd.h | 5 +- include/linux/fs.h | 6 +- include/linux/fsnotify.h | 248 +++++++++ include/linux/inotify.h | 108 ++++ include/linux/sched.h | 4 + include/linux/sysctl.h | 11 +- kernel/sys_ni.c | 3 + kernel/sysctl.c | 43 +- kernel/user.c | 4 + 24 files changed, 1639 insertions(+), 67 deletions(-) create mode 100644 Documentation/filesystems/inotify.txt create mode 100644 fs/inotify.c create mode 100644 include/linux/fsnotify.h create mode 100644 include/linux/inotify.h (limited to 'include/linux') diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.txt new file mode 100644 index 00000000000..2c716041f57 --- /dev/null +++ b/Documentation/filesystems/inotify.txt @@ -0,0 +1,138 @@ + inotify + a powerful yet simple file change notification system + + + +Document started 15 Mar 2005 by Robert Love + +(i) User Interface + +Inotify is controlled by a set of three sys calls + +First step in using inotify is to initialise an inotify instance + + int fd = inotify_init (); + +Change events are managed by "watches". A watch is an (object,mask) pair where +the object is a file or directory and the mask is a bit mask of one or more +inotify events that the application wishes to receive. See +for valid events. A watch is referenced by a watch descriptor, or wd. + +Watches are added via a path to the file. + +Watches on a directory will return events on any files inside of the directory. + +Adding a watch is simple, + + int wd = inotify_add_watch (fd, path, mask); + +You can add a large number of files via something like + + for each file to watch { + int wd = inotify_add_watch (fd, file, mask); + } + +You can update an existing watch in the same manner, by passing in a new mask. + +An existing watch is removed via the INOTIFY_IGNORE ioctl, for example + + inotify_rm_watch (fd, wd); + +Events are provided in the form of an inotify_event structure that is read(2) +from a inotify instance fd. The filename is of dynamic length and follows the +struct. It is of size len. The filename is padded with null bytes to ensure +proper alignment. This padding is reflected in len. + +You can slurp multiple events by passing a large buffer, for example + + size_t len = read (fd, buf, BUF_LEN); + +Will return as many events as are available and fit in BUF_LEN. + +each inotify instance fd is also select()- and poll()-able. + +You can find the size of the current event queue via the FIONREAD ioctl. + +All watches are destroyed and cleaned up on close. + + +(ii) Internal Kernel Implementation + +Each open inotify instance is associated with an inotify_device structure. + +Each watch is associated with an inotify_watch structure. Watches are chained +off of each associated device and each associated inode. + +See fs/inotify.c for the locking and lifetime rules. + + +(iii) Rationale + +Q: What is the design decision behind not tying the watch to the open fd of + the watched object? + +A: Watches are associated with an open inotify device, not an open file. + This solves the primary problem with dnotify: keeping the file open pins + the file and thus, worse, pins the mount. Dnotify is therefore infeasible + for use on a desktop system with removable media as the media cannot be + unmounted. + +Q: What is the design decision behind using an-fd-per-device as opposed to + an fd-per-watch? + +A: An fd-per-watch quickly consumes more file descriptors than are allowed, + more fd's than are feasible to manage, and more fd's than are optimally + select()-able. Yes, root can bump the per-process fd limit and yes, users + can use epoll, but requiring both is a silly and extraneous requirement. + A watch consumes less memory than an open file, separating the number + spaces is thus sensible. The current design is what user-space developers + want: Users initialize inotify, once, and add n watches, requiring but one fd + and no twiddling with fd limits. Initializing an inotify instance two + thousand times is silly. If we can implement user-space's preferences + cleanly--and we can, the idr layer makes stuff like this trivial--then we + should. + + There are other good arguments. With a single fd, there is a single + item to block on, which is mapped to a single queue of events. The single + fd returns all watch events and also any potential out-of-band data. If + every fd was a separate watch, + + - There would be no way to get event ordering. Events on file foo and + file bar would pop poll() on both fd's, but there would be no way to tell + which happened first. A single queue trivially gives you ordering. Such + ordering is crucial to existing applications such as Beagle. Imagine + "mv a b ; mv b a" events without ordering. + + - We'd have to maintain n fd's and n internal queues with state, + versus just one. It is a lot messier in the kernel. A single, linear + queue is the data structure that makes sense. + + - User-space developers prefer the current API. The Beagle guys, for + example, love it. Trust me, I asked. It is not a surprise: Who'd want + to manage and block on 1000 fd's via select? + + - You'd have to manage the fd's, as an example: Call close() when you + received a delete event. + + - No way to get out of band data. + + - 1024 is still too low. ;-) + + When you talk about designing a file change notification system that + scales to 1000s of directories, juggling 1000s of fd's just does not seem + the right interface. It is too heavy. + +Q: Why the system call approach? + +A: The poor user-space interface is the second biggest problem with dnotify. + Signals are a terrible, terrible interface for file notification. Or for + anything, for that matter. The ideal solution, from all perspectives, is a + file descriptor-based one that allows basic file I/O and poll/select. + Obtaining the fd and managing the watches could have been done either via a + device file or a family of new system calls. We decided to implement a + family of system calls because that is the preffered approach for new kernel + features and it means our user interface requirements. + + Additionally, it _is_ possible to more than one instance and + juggle more than one queue and thus more than one associated fd. + diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 3db9a04aec6..468500a7e89 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -291,3 +291,6 @@ ENTRY(sys_call_table) .long sys_keyctl .long sys_ioprio_set .long sys_ioprio_get /* 290 */ + .long sys_inotify_init + .long sys_inotify_add_watch + .long sys_inotify_rm_watch diff --git a/fs/Kconfig b/fs/Kconfig index f93fd41b025..5d0c4be43db 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -359,6 +359,19 @@ config ROMFS_FS If you don't know whether you need it, then you don't need it: answer N. +config INOTIFY + bool "Inotify file change notification support" + default y + ---help--- + Say Y here to enable inotify support and the /dev/inotify character + device. Inotify is a file change notification system and a + replacement for dnotify. Inotify fixes numerous shortcomings in + dnotify and introduces several new features. It allows monitoring + of both files and directories via a single open fd. Multiple file + events are supported. + + If unsure, say Y. + config QUOTA bool "Quota support" help diff --git a/fs/Makefile b/fs/Makefile index 20edcf28bfd..cf95eb894fd 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -12,6 +12,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ ioprio.o +obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_COMPAT) += compat.o diff --git a/fs/attr.c b/fs/attr.c index c3c76fe7834..b1796fb9e52 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -107,31 +107,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr) out: return error; } - EXPORT_SYMBOL(inode_setattr); -int setattr_mask(unsigned int ia_valid) -{ - unsigned long dn_mask = 0; - - if (ia_valid & ATTR_UID) - dn_mask |= DN_ATTRIB; - if (ia_valid & ATTR_GID) - dn_mask |= DN_ATTRIB; - if (ia_valid & ATTR_SIZE) - dn_mask |= DN_MODIFY; - /* both times implies a utime(s) call */ - if ((ia_valid & (ATTR_ATIME|ATTR_MTIME)) == (ATTR_ATIME|ATTR_MTIME)) - dn_mask |= DN_ATTRIB; - else if (ia_valid & ATTR_ATIME) - dn_mask |= DN_ACCESS; - else if (ia_valid & ATTR_MTIME) - dn_mask |= DN_MODIFY; - if (ia_valid & ATTR_MODE) - dn_mask |= DN_ATTRIB; - return dn_mask; -} - int notify_change(struct dentry * dentry, struct iattr * attr) { struct inode *inode = dentry->d_inode; @@ -197,11 +174,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr) if (ia_valid & ATTR_SIZE) up_write(&dentry->d_inode->i_alloc_sem); - if (!error) { - unsigned long dn_mask = setattr_mask(ia_valid); - if (dn_mask) - dnotify_parent(dentry, dn_mask); - } + if (!error) + fsnotify_change(dentry, ia_valid); + return error; } diff --git a/fs/compat.c b/fs/compat.c index 728cd836538..6b06b6bae35 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -1307,9 +1307,13 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, out: if (iov != iovstack) kfree(iov); - if ((ret + (type == READ)) > 0) - dnotify_parent(file->f_dentry, - (type == READ) ? DN_ACCESS : DN_MODIFY); + if ((ret + (type == READ)) > 0) { + struct dentry *dentry = file->f_dentry; + if (type == READ) + fsnotify_access(dentry); + else + fsnotify_modify(dentry); + } return ret; } diff --git a/fs/file_table.c b/fs/file_table.c index fa7849fae13..1d3de78e6bc 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -16,6 +16,7 @@ #include #include #include +#include /* sysctl tunables... */ struct files_stat_struct files_stat = { @@ -126,6 +127,8 @@ void fastcall __fput(struct file *file) struct inode *inode = dentry->d_inode; might_sleep(); + + fsnotify_close(file); /* * The function eventpoll_release() should be the first called * in the file cleanup chain. diff --git a/fs/inode.c b/fs/inode.c index 5bc97507eea..96364fae084 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * This is needed for the following functions: @@ -202,6 +203,10 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); spin_lock_init(&inode->i_lock); i_size_ordered_init(inode); +#ifdef CONFIG_INOTIFY + INIT_LIST_HEAD(&inode->inotify_watches); + sema_init(&inode->inotify_sem, 1); +#endif } EXPORT_SYMBOL(inode_init_once); @@ -351,6 +356,7 @@ int invalidate_inodes(struct super_block * sb) down(&iprune_sem); spin_lock(&inode_lock); + inotify_unmount_inodes(&sb->s_inodes); busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); diff --git a/fs/inotify.c b/fs/inotify.c new file mode 100644 index 00000000000..e423bfe0c86 --- /dev/null +++ b/fs/inotify.c @@ -0,0 +1,999 @@ +/* + * fs/inotify.c - inode-based file event notifications + * + * Authors: + * John McCutchan + * Robert Love + * + * Copyright (C) 2005 John McCutchan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static atomic_t inotify_cookie; + +static kmem_cache_t *watch_cachep; +static kmem_cache_t *event_cachep; + +static struct vfsmount *inotify_mnt; + +/* These are configurable via /proc/sys/inotify */ +int inotify_max_user_devices; +int inotify_max_user_watches; +int inotify_max_queued_events; + +/* + * Lock ordering: + * + * dentry->d_lock (used to keep d_move() away from dentry->d_parent) + * iprune_sem (synchronize shrink_icache_memory()) + * inode_lock (protects the super_block->s_inodes list) + * inode->inotify_sem (protects inode->inotify_watches and watches->i_list) + * inotify_dev->sem (protects inotify_device and watches->d_list) + */ + +/* + * Lifetimes of the three main data structures--inotify_device, inode, and + * inotify_watch--are managed by reference count. + * + * inotify_device: Lifetime is from open until release. Additional references + * can bump the count via get_inotify_dev() and drop the count via + * put_inotify_dev(). + * + * inotify_watch: Lifetime is from create_watch() to destory_watch(). + * Additional references can bump the count via get_inotify_watch() and drop + * the count via put_inotify_watch(). + * + * inode: Pinned so long as the inode is associated with a watch, from + * create_watch() to put_inotify_watch(). + */ + +/* + * struct inotify_device - represents an open instance of an inotify device + * + * This structure is protected by the semaphore 'sem'. + */ +struct inotify_device { + wait_queue_head_t wq; /* wait queue for i/o */ + struct idr idr; /* idr mapping wd -> watch */ + struct semaphore sem; /* protects this bad boy */ + struct list_head events; /* list of queued events */ + struct list_head watches; /* list of watches */ + atomic_t count; /* reference count */ + struct user_struct *user; /* user who opened this dev */ + unsigned int queue_size; /* size of the queue (bytes) */ + unsigned int event_count; /* number of pending events */ + unsigned int max_events; /* maximum number of events */ +}; + +/* + * struct inotify_kernel_event - An inotify event, originating from a watch and + * queued for user-space. A list of these is attached to each instance of the + * device. In read(), this list is walked and all events that can fit in the + * buffer are returned. + * + * Protected by dev->sem of the device in which we are queued. + */ +struct inotify_kernel_event { + struct inotify_event event; /* the user-space event */ + struct list_head list; /* entry in inotify_device's list */ + char *name; /* filename, if any */ +}; + +/* + * struct inotify_watch - represents a watch request on a specific inode + * + * d_list is protected by dev->sem of the associated watch->dev. + * i_list and mask are protected by inode->inotify_sem of the associated inode. + * dev, inode, and wd are never written to once the watch is created. + */ +struct inotify_watch { + struct list_head d_list; /* entry in inotify_device's list */ + struct list_head i_list; /* entry in inode's list */ + atomic_t count; /* reference count */ + struct inotify_device *dev; /* associated device */ + struct inode *inode; /* associated inode */ + s32 wd; /* watch descriptor */ + u32 mask; /* event mask for this watch */ +}; + +static inline void get_inotify_dev(struct inotify_device *dev) +{ + atomic_inc(&dev->count); +} + +static inline void put_inotify_dev(struct inotify_device *dev) +{ + if (atomic_dec_and_test(&dev->count)) { + atomic_dec(&dev->user->inotify_devs); + free_uid(dev->user); + kfree(dev); + } +} + +static inline void get_inotify_watch(struct inotify_watch *watch) +{ + atomic_inc(&watch->count); +} + +/* + * put_inotify_watch - decrements the ref count on a given watch. cleans up + * the watch and its references if the count reaches zero. + */ +static inline void put_inotify_watch(struct inotify_watch *watch) +{ + if (atomic_dec_and_test(&watch->count)) { + put_inotify_dev(watch->dev); + iput(watch->inode); + kmem_cache_free(watch_cachep, watch); + } +} + +/* + * kernel_event - create a new kernel event with the given parameters + * + * This function can sleep. + */ +static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, + const char *name) +{ + struct inotify_kernel_event *kevent; + + kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL); + if (unlikely(!kevent)) + return NULL; + + /* we hand this out to user-space, so zero it just in case */ + memset(&kevent->event, 0, sizeof(struct inotify_event)); + + kevent->event.wd = wd; + kevent->event.mask = mask; + kevent->event.cookie = cookie; + + INIT_LIST_HEAD(&kevent->list); + + if (name) { + size_t len, rem, event_size = sizeof(struct inotify_event); + + /* + * We need to pad the filename so as to properly align an + * array of inotify_event structures. Because the structure is + * small and the common case is a small filename, we just round + * up to the next multiple of the structure's sizeof. This is + * simple and safe for all architectures. + */ + len = strlen(name) + 1; + rem = event_size - len; + if (len > event_size) { + rem = event_size - (len % event_size); + if (len % event_size == 0) + rem = 0; + } + + kevent->name = kmalloc(len + rem, GFP_KERNEL); + if (unlikely(!kevent->name)) { + kmem_cache_free(event_cachep, kevent); + return NULL; + } + memcpy(kevent->name, name, len); + if (rem) + memset(kevent->name + len, 0, rem); + kevent->event.len = len + rem; + } else { + kevent->event.len = 0; + kevent->name = NULL; + } + + return kevent; +} + +/* + * inotify_dev_get_event - return the next event in the given dev's queue + * + * Caller must hold dev->sem. + */ +static inline struct inotify_kernel_event * +inotify_dev_get_event(struct inotify_device *dev) +{ + return list_entry(dev->events.next, struct inotify_kernel_event, list); +} + +/* + * inotify_dev_queue_event - add a new event to the given device + * + * Caller must hold dev->sem. Can sleep (calls kernel_event()). + */ +static void inotify_dev_queue_event(struct inotify_device *dev, + struct inotify_watch *watch, u32 mask, + u32 cookie, const char *name) +{ + struct inotify_kernel_event *kevent, *last; + + /* coalescing: drop this event if it is a dupe of the previous */ + last = inotify_dev_get_event(dev); + if (last && last->event.mask == mask && last->event.wd == watch->wd && + last->event.cookie == cookie) { + const char *lastname = last->name; + + if (!name && !lastname) + return; + if (name && lastname && !strcmp(lastname, name)) + return; + } + + /* the queue overflowed and we already sent the Q_OVERFLOW event */ + if (unlikely(dev->event_count > dev->max_events)) + return; + + /* if the queue overflows, we need to notify user space */ + if (unlikely(dev->event_count == dev->max_events)) + kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL); + else + kevent = kernel_event(watch->wd, mask, cookie, name); + + if (unlikely(!kevent)) + return; + + /* queue the event and wake up anyone waiting */ + dev->event_count++; + dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; + list_add_tail(&kevent->list, &dev->events); + wake_up_interruptible(&dev->wq); +} + +/* + * remove_kevent - cleans up and ultimately frees the given kevent + * + * Caller must hold dev->sem. + */ +static void remove_kevent(struct inotify_device *dev, + struct inotify_kernel_event *kevent) +{ + list_del(&kevent->list); + + dev->event_count--; + dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; + + kfree(kevent->name); + kmem_cache_free(event_cachep, kevent); +} + +/* + * inotify_dev_event_dequeue - destroy an event on the given device + * + * Caller must hold dev->sem. + */ +static void inotify_dev_event_dequeue(struct inotify_device *dev) +{ + if (!list_empty(&dev->events)) { + struct inotify_kernel_event *kevent; + kevent = inotify_dev_get_event(dev); + remove_kevent(dev, kevent); + } +} + +/* + * inotify_dev_get_wd - returns the next WD for use by the given dev + * + * Callers must hold dev->sem. This function can sleep. + */ +static int inotify_dev_get_wd(struct inotify_device *dev, + struct inotify_watch *watch) +{ + int ret; + + do { + if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL))) + return -ENOSPC; + ret = idr_get_new(&dev->idr, watch, &watch->wd); + } while (ret == -EAGAIN); + + return ret; +} + +/* + * find_inode - resolve a user-given path to a specific inode and return a nd + */ +static int find_inode(const char __user *dirname, struct nameidata *nd) +{ + int error; + + error = __user_walk(dirname, LOOKUP_FOLLOW, nd); + if (error) + return error; + /* you can only watch an inode if you have read permissions on it */ + error = permission(nd->dentry->d_inode, MAY_READ, NULL); + if (error) + path_release (nd); + return error; +} + +/* + * create_watch - creates a watch on the given device. + * + * Callers must hold dev->sem. Calls inotify_dev_get_wd() so may sleep. + * Both 'dev' and 'inode' (by way of nameidata) need to be pinned. + */ +static struct inotify_watch *create_watch(struct inotify_device *dev, + u32 mask, struct inode *inode) +{ + struct inotify_watch *watch; + int ret; + + if (atomic_read(&dev->user->inotify_watches) >= inotify_max_user_watches) + return ERR_PTR(-ENOSPC); + + watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL); + if (unlikely(!watch)) + return ERR_PTR(-ENOMEM); + + ret = inotify_dev_get_wd(dev, watch); + if (unlikely(ret)) { + kmem_cache_free(watch_cachep, watch); + return ERR_PTR(ret); + } + + watch->mask = mask; + atomic_set(&watch->count, 0); + INIT_LIST_HEAD(&watch->d_list); + INIT_LIST_HEAD(&watch->i_list); + + /* save a reference to device and bump the count to make it official */ + get_inotify_dev(dev); + watch->dev = dev; + + /* + * Save a reference to the inode and bump the ref count to make it + * official. We hold a reference to nameidata, which makes this safe. + */ + watch->inode = igrab(inode); + + /* bump our own count, corresponding to our entry in dev->watches */ + get_inotify_watch(watch); + + atomic_inc(&dev->user->inotify_watches); + + return watch; +} + +/* + * inotify_find_dev - find the watch associated with the given inode and dev + * + * Callers must hold inode->inotify_sem. + */ +static struct inotify_watch *inode_find_dev(struct inode *inode, + struct inotify_device *dev) +{ + struct inotify_watch *watch; + + list_for_each_entry(watch, &inode->inotify_watches, i_list) { + if (watch->dev == dev) + return watch; + } + + return NULL; +} + +/* + * remove_watch_no_event - remove_watch() without the IN_IGNORED event. + */ +static void remove_watch_no_event(struct inotify_watch *watch, + struct inotify_device *dev) +{ + list_del(&watch->i_list); + list_del(&watch->d_list); + + atomic_dec(&dev->user->inotify_watches); + idr_remove(&dev->idr, watch->wd); + put_inotify_watch(watch); +} + +/* + * remove_watch - Remove a watch from both the device and the inode. Sends + * the IN_IGNORED event to the given device signifying that the inode is no + * longer watched. + * + * Callers must hold both inode->inotify_sem and dev->sem. We drop a + * reference to the inode before returning. + * + * The inode is not iput() so as to remain atomic. If the inode needs to be + * iput(), the call returns one. Otherwise, it returns zero. + */ +static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev) +{ + inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL); + remove_watch_no_event(watch, dev); +} + +/* + * inotify_inode_watched - returns nonzero if there are watches on this inode + * and zero otherwise. We call this lockless, we do not care if we race. + */ +static inline int inotify_inode_watched(struct inode *inode) +{ + return !list_empty(&inode->inotify_watches); +} + +/* Kernel API */ + +/** + * inotify_inode_queue_event - queue an event to all watches on this inode + * @inode: inode event is originating from + * @mask: event mask describing this event + * @cookie: cookie for synchronization, or zero + * @name: filename, if any + */ +void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie, + const char *name) +{ + struct inotify_watch *watch, *next; + + if (!inotify_inode_watched(inode)) + return; + + down(&inode->inotify_sem); + list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { + u32 watch_mask = watch->mask; + if (watch_mask & mask) { + struct inotify_device *dev = watch->dev; + get_inotify_watch(watch); + down(&dev->sem); + inotify_dev_queue_event(dev, watch, mask, cookie, name); + if (watch_mask & IN_ONESHOT) + remove_watch_no_event(watch, dev); + up(&dev->sem); + put_inotify_watch(watch); + } + } + up(&inode->inotify_sem); +} +EXPORT_SYMBOL_GPL(inotify_inode_queue_event); + +/** + * inotify_dentry_parent_queue_event - queue an event to a dentry's parent + * @dentry: the dentry in question, we queue against this dentry's parent + * @mask: event mask describing this event + * @cookie: cookie for synchronization, or zero + * @name: filename, if any + */ +void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, + u32 cookie, const char *name) +{ + struct dentry *parent; + struct inode *inode; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent; + inode = parent->d_inode; + + if (inotify_inode_watched(inode)) { + dget(parent); + spin_unlock(&dentry->d_lock); + inotify_inode_queue_event(inode, mask, cookie, name); + dput(parent); + } else + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event); + +/** + * inotify_get_cookie - return a unique cookie for use in synchronizing events. + */ +u32 inotify_get_cookie(void) +{ + return atomic_inc_return(&inotify_cookie); +} +EXPORT_SYMBOL_GPL(inotify_get_cookie); + +/** + * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes. + * @list: list of inodes being unmounted (sb->s_inodes) + * + * Called with inode_lock held, protecting the unmounting super block's list + * of inodes, and with iprune_sem held, keeping shrink_icache_memory() at bay. + * We temporarily drop inode_lock, however, and CAN block. + */ +void inotify_unmount_inodes(struct list_head *list) +{ + struct inode *inode, *next_i, *need_iput = NULL; + + list_for_each_entry_safe(inode, next_i, list, i_sb_list) { + struct inotify_watch *watch, *next_w; + struct inode *need_iput_tmp; + struct list_head *watches; + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!atomic_read(&inode->i_count)) + continue; + + /* + * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or + * I_WILL_FREE which is fine because by that point the inode + * cannot have any associated watches. + */ + if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE)) + continue; + + need_iput_tmp = need_iput; + need_iput = NULL; + /* In case the remove_watch() drops a reference. */ + if (inode != need_iput_tmp) + __iget(inode); + else + need_iput_tmp = NULL; + /* In case the dropping of a reference would nuke next_i. */ + if ((&next_i->i_sb_list != list) && + atomic_read(&next_i->i_count) && + !(next_i->i_state & (I_CLEAR | I_FREEING | + I_WILL_FREE))) { + __iget(next_i); + need_iput = next_i; + } + + /* + * We can safely drop inode_lock here because we hold + * references on both inode and next_i. Also no new inodes + * will be added since the umount has begun. Finally, + * iprune_sem keeps shrink_icache_memory() away. + */ + spin_unlock(&inode_lock); + + if (need_iput_tmp) + iput(need_iput_tmp); + + /* for each watch, send IN_UNMOUNT and then remove it */ + down(&inode->inotify_sem); + watches = &inode->inotify_watches; + list_for_each_entry_safe(watch, next_w, watches, i_list) { + struct inotify_device *dev = watch->dev; + down(&dev->sem); + inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL); + remove_watch(watch, dev); + up(&dev->sem); + } + up(&inode->inotify_sem); + iput(inode); + + spin_lock(&inode_lock); + } +} +EXPORT_SYMBOL_GPL(inotify_unmount_inodes); + +/** + * inotify_inode_is_dead - an inode has been deleted, cleanup any watches + * @inode: inode that is about to be removed + */ +void inotify_inode_is_dead(struct inode *inode) +{ + struct inotify_watch *watch, *next; + + down(&inode->inotify_sem); + list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { + struct inotify_device *dev = watch->dev; + down(&dev->sem); + remove_watch(watch, dev); + up(&dev->sem); + } + up(&inode->inotify_sem); +} +EXPORT_SYMBOL_GPL(inotify_inode_is_dead); + +/* Device Interface */ + +static unsigned int inotify_poll(struct file *file, poll_table *wait) +{ + struct inotify_device *dev = file->private_data; + int ret = 0; + + poll_wait(file, &dev->wq, wait); + down(&dev->sem); + if (!list_empty(&dev->events)) + ret = POLLIN | POLLRDNORM; + up(&dev->sem); + + return ret; +} + +static ssize_t inotify_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + size_t event_size = sizeof (struct inotify_event); + struct inotify_device *dev; + char __user *start; + int ret; + DEFINE_WAIT(wait); + + start = buf; + dev = file->private_data; + + while (1) { + int events; + + prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); + + down(&dev->sem); + events = !list_empty(&dev->events); + up(&dev->sem); + if (events) { + ret = 0; + break; + } + + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } + + if (signal_pending(current)) { + ret = -EINTR; + break; + } + + schedule(); + } + + finish_wait(&dev->wq, &wait); + if (ret) + return ret; + + down(&dev->sem); + while (1) { + struct inotify_kernel_event *kevent; + + ret = buf - start; + if (list_empty(&dev->events)) + break; + + kevent = inotify_dev_get_event(dev); + if (event_size + kevent->event.len > count) + break; + + if (copy_to_user(buf, &kevent->event, event_size)) { + ret = -EFAULT; + break; + } + buf += event_size; + count -= event_size; + + if (kevent->name) { + if (copy_to_user(buf, kevent->name, kevent->event.len)){ + ret = -EFAULT; + break; + } + buf += kevent->event.len; + count -= kevent->event.len; + } + + remove_kevent(dev, kevent); + } + up(&dev->sem); + + return ret; +} + +static int inotify_release(struct inode *ignored, struct file *file) +{ + struct inotify_device *dev = file->private_data; + + /* + * Destroy all of the watches on this device. Unfortunately, not very + * pretty. We cannot do a simple iteration over the list, because we + * do not know the inode until we iterate to the watch. But we need to + * hold inode->inotify_sem before dev->sem. The following works. + */ + while (1) { + struct inotify_watch *watch; + struct list_head *watches; + struct inode *inode; + + down(&dev->sem); + watches = &dev->watches; + if (list_empty(watches)) { + up(&dev->sem); + break; + } + watch = list_entry(watches->next, struct inotify_watch, d_list); + get_inotify_watch(watch); + up(&dev->sem); + + inode = watch->inode; + down(&inode->inotify_sem); + down(&dev->sem); + remove_watch_no_event(watch, dev); + up(&dev->sem); + up(&inode->inotify_sem); + put_inotify_watch(watch); + } + + /* destroy all of the events on this device */ + down(&dev->sem); + while (!list_empty(&dev->events)) + inotify_dev_event_dequeue(dev); + up(&dev->sem); + + /* free this device: the put matching the get in inotify_open() */ + put_inotify_dev(dev); + + return 0; +} + +/* + * inotify_ignore - handle the INOTIFY_IGNORE ioctl, asking that a given wd be + * removed from the device. + * + * Can sleep. + */ +static int inotify_ignore(struct inotify_device *dev, s32 wd) +{ + struct inotify_watch *watch; + struct inode *inode; + + down(&dev->sem); + watch = idr_find(&dev->idr, wd); + if (unlikely(!watch)) { + up(&dev->sem); + return -EINVAL; + } + get_inotify_watch(watch); + inode = watch->inode; + up(&dev->sem); + + down(&inode->inotify_sem); + down(&dev->sem); + + /* make sure that we did not race */ + watch = idr_find(&dev->idr, wd); + if (likely(watch)) + remove_watch(watch, dev); + + up(&dev->sem); + up(&inode->inotify_sem); + put_inotify_watch(watch); + + return 0; +} + +static long inotify_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inotify_device *dev; + void __user *p; + int ret = -ENOTTY; + + dev = file->private_data; + p = (void __user *) arg; + + switch (cmd) { + case FIONREAD: + ret = put_user(dev->queue_size, (int __user *) p); + break; + } + + return ret; +} + +static struct file_operations inotify_fops = { + .poll = inotify_poll, + .read = inotify_read, + .release = inotify_release, + .unlocked_ioctl = inotify_ioctl, + .compat_ioctl = inotify_ioctl, +}; + +asmlinkage long sys_inotify_init(void) +{ + struct inotify_device *dev; + struct user_struct *user; + int ret = -ENOTTY; + int fd; + struct file *filp; + + fd = get_unused_fd(); + if (fd < 0) { + ret = fd; + goto out; + } + + filp = get_empty_filp(); + if (!filp) { + put_unused_fd(fd); + ret = -ENFILE; + goto out; + } + filp->f_op = &inotify_fops; + filp->f_vfsmnt = mntget(inotify_mnt); + filp->f_dentry = dget(inotify_mnt->mnt_root); + filp->f_mapping = filp->f_dentry->d_inode->i_mapping; + filp->f_mode = FMODE_READ; + filp->f_flags = O_RDONLY; + + user = get_uid(current->user); + + if (unlikely(atomic_read(&user->inotify_devs) >= inotify_max_user_devices)) { + ret = -EMFILE; + goto out_err; + } + + dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); + if (unlikely(!dev)) { + ret = -ENOMEM; + goto out_err; + } + + idr_init(&dev->idr); + INIT_LIST_HEAD(&dev->events); + INIT_LIST_HEAD(&dev->watches); + init_waitqueue_head(&dev->wq); + sema_init(&dev->sem, 1); + dev->event_count = 0; + dev->queue_size = 0; + dev->max_events = inotify_max_queued_events; + dev->user = user; + atomic_set(&dev->count, 0); + + get_inotify_dev(dev); + atomic_inc(&user->inotify_devs); + + filp->private_data = dev; + fd_install (fd, filp); + return fd; +out_err: + put_unused_fd (fd); + put_filp (filp); + free_uid(user); +out: + return ret; +} + +asmlinkage long sys_inotify_add_watch(int fd, const char *path, u32 mask) +{ + struct inotify_watch *watch, *old; + struct inode *inode; + struct inotify_device *dev; + struct nameidata nd; + struct file *filp; + int ret; + + filp = fget(fd); + if (!filp) + return -EBADF; + + dev = filp->private_data; + + ret = find_inode ((const char __user*)path, &nd); + if (ret) + goto fput_and_out; + + /* Held in place by reference in nd */ + inode = nd.dentry->d_inode; + + down(&inode->inotify_sem); + down(&dev->sem); + + /* don't let user-space set invalid bits: we don't want flags set */ + mask &= IN_ALL_EVENTS; + if (!mask) { + ret = -EINVAL; + goto out; + } + + /* + * Handle the case of re-adding a watch on an (inode,dev) pair that we + * are already watching. We just update the mask and return its wd. + */ + old = inode_find_dev(inode, dev); + if (unlikely(old)) { + old->mask = mask; + ret = old->wd; + goto out; + } + + watch = create_watch(dev, mask, inode); + if (unlikely(IS_ERR(watch))) { + ret = PTR_ERR(watch); + goto out; + } + + /* Add the watch to the device's and the inode's list */ + list_add(&watch->d_list, &dev->watches); + list_add(&watch->i_list, &inode->inotify_watches); + ret = watch->wd; +out: + path_release (&nd); + up(&dev->sem); + up(&inode->inotify_sem); +fput_and_out: + fput(filp); + return ret; +} + +asmlinkage long sys_inotify_rm_watch(int fd, u32 wd) +{ + struct file *filp; + struct inotify_device *dev; + int ret; + + filp = fget(fd); + if (!filp) + return -EBADF; + dev = filp->private_data; + ret = inotify_ignore (dev, wd); + fput(filp); + return ret; +} + +static struct super_block * +inotify_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA); +} + +static struct file_system_type inotify_fs_type = { + .name = "inotifyfs", + .get_sb = inotify_get_sb, + .kill_sb = kill_anon_super, +}; + +/* + * inotify_init - Our initialization function. Note that we cannnot return + * error because we have compiled-in VFS hooks. So an (unlikely) failure here + * must result in panic(). + */ +static int __init inotify_init(void) +{ + register_filesystem(&inotify_fs_type); + inotify_mnt = kern_mount(&inotify_fs_type); + + inotify_max_queued_events = 8192; + inotify_max_user_devices = 128; + inotify_max_user_watches = 8192; + + atomic_set(&inotify_cookie, 0); + + watch_cachep = kmem_cache_create("inotify_watch_cache", + sizeof(struct inotify_watch), + 0, SLAB_PANIC, NULL, NULL); + event_cachep = kmem_cache_create("inotify_event_cache", + sizeof(struct inotify_kernel_event), + 0, SLAB_PANIC, NULL, NULL); + + printk(KERN_INFO "inotify syscall\n"); + + return 0; +} + +module_init(inotify_init); diff --git a/fs/namei.c b/fs/namei.c index 1d93cb4f7c5..02a824cd3c5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -1312,7 +1312,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, DQUOT_INIT(dir); error = dir->i_op->create(dir, dentry, mode, nd); if (!error) { - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, dentry->d_name.name); security_inode_post_create(dir, dentry, mode); } return error; @@ -1637,7 +1637,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) DQUOT_INIT(dir); error = dir->i_op->mknod(dir, dentry, mode, dev); if (!error) { - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, dentry->d_name.name); security_inode_post_mknod(dir, dentry, mode, dev); } return error; @@ -1710,7 +1710,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) DQUOT_INIT(dir); error = dir->i_op->mkdir(dir, dentry, mode); if (!error) { - inode_dir_notify(dir, DN_CREATE); + fsnotify_mkdir(dir, dentry->d_name.name); security_inode_post_mkdir(dir,dentry, mode); } return error; @@ -1801,7 +1801,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) } up(&dentry->d_inode->i_sem); if (!error) { - inode_dir_notify(dir, DN_DELETE); + fsnotify_rmdir(dentry, dentry->d_inode, dir); d_delete(dentry); } dput(dentry); @@ -1874,9 +1874,10 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { + fsnotify_unlink(dentry, dir); d_delete(dentry); - inode_dir_notify(dir, DN_DELETE); } + return error; } @@ -1950,7 +1951,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i DQUOT_INIT(dir); error = dir->i_op->symlink(dir, dentry, oldname); if (!error) { - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, dentry->d_name.name); security_inode_post_symlink(dir, dentry, oldname); } return error; @@ -2023,7 +2024,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de error = dir->i_op->link(old_dentry, dir, new_dentry); up(&old_dentry->d_inode->i_sem); if (!error) { - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, new_dentry->d_name.name); security_inode_post_link(old_dentry, dir, new_dentry); } return error; @@ -2187,6 +2188,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, { int error; int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); + const char *old_name; if (old_dentry->d_inode == new_dentry->d_inode) return 0; @@ -2208,18 +2210,18 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, DQUOT_INIT(old_dir); DQUOT_INIT(new_dir); + old_name = fsnotify_oldname_init(old_dentry->d_name.name); + if (is_dir) error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); else error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); if (!error) { - if (old_dir == new_dir) - inode_dir_notify(old_dir, DN_RENAME); - else { - inode_dir_notify(old_dir, DN_DELETE); - inode_dir_notify(new_dir, DN_CREATE); - } + const char *new_name = old_dentry->d_name.name; + fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir); } + fsnotify_oldname_free(old_name); + return error; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5e0bf391760..4f2cd3d2756 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -45,7 +45,7 @@ #endif /* CONFIG_NFSD_V3 */ #include #include -#include +#include #include #include #ifdef CONFIG_NFSD_V4 @@ -860,7 +860,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, nfsdstats.io_read += err; *count = err; err = 0; - dnotify_parent(file->f_dentry, DN_ACCESS); + fsnotify_access(file->f_dentry); } else err = nfserrno(err); out: @@ -916,7 +916,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, set_fs(oldfs); if (err >= 0) { nfsdstats.io_write += cnt; - dnotify_parent(file->f_dentry, DN_MODIFY); + fsnotify_modify(file->f_dentry); } /* clear setuid/setgid flag after write */ diff --git a/fs/open.c b/fs/open.c index 3f4a4286fdc..32bf05e2996 100644 --- a/fs/open.c +++ b/fs/open.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -951,6 +951,7 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode) put_unused_fd(fd); fd = PTR_ERR(f); } else { + fsnotify_open(f->f_dentry); fd_install(fd, f); } } diff --git a/fs/read_write.c b/fs/read_write.c index 9292f5fa4d6..563abd09b5c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -252,7 +252,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) else ret = do_sync_read(file, buf, count, pos); if (ret > 0) { - dnotify_parent(file->f_dentry, DN_ACCESS); + fsnotify_access(file->f_dentry); current->rchar += ret; } current->syscr++; @@ -303,7 +303,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ else ret = do_sync_write(file, buf, count, pos); if (ret > 0) { - dnotify_parent(file->f_dentry, DN_MODIFY); + fsnotify_modify(file->f_dentry); current->wchar += ret; } current->syscw++; @@ -539,9 +539,12 @@ static ssize_t do_readv_writev(int type, struct file *file, out: if (iov != iovstack) kfree(iov); - if ((ret + (type == READ)) > 0) - dnotify_parent(file->f_dentry, - (type == READ) ? DN_ACCESS : DN_MODIFY); + if ((ret + (type == READ)) > 0) { + if (type == READ) + fsnotify_access(file->f_dentry); + else + fsnotify_modify(file->f_dentry); + } return ret; Efault: ret = -EFAULT; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index d72c1ce4855..335288b9be0 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -3,7 +3,7 @@ */ #include -#include +#include #include #include #include @@ -391,9 +391,6 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) * sysfs_update_file - update the modified timestamp on an object attribute. * @kobj: object we're acting for. * @attr: attribute descriptor. - * - * Also call dnotify for the dentry, which lots of userspace programs - * use. */ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) { @@ -408,7 +405,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) if (victim->d_inode && (victim->d_parent->d_inode == dir->d_inode)) { victim->d_inode->i_mtime = CURRENT_TIME; - dnotify_parent(victim, DN_MODIFY); + fsnotify_modify(victim); /** * Drop reference from initial sysfs_get_dentry(). diff --git a/fs/xattr.c b/fs/xattr.c index 93dee70a1db..6acd5c63da9 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -16,6 +16,7 @@ #include #include #include +#include #include /* @@ -57,8 +58,10 @@ setxattr(struct dentry *d, char __user *name, void __user *value, if (error) goto out; error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags); - if (!error) + if (!error) { + fsnotify_xattr(d); security_inode_post_setxattr(d, kname, kvalue, size, flags); + } out: up(&d->d_inode->i_sem); } diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index e25e4c71a87..a7cb377745b 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -296,8 +296,11 @@ #define __NR_keyctl 288 #define __NR_ioprio_set 289 #define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 -#define NR_syscalls 291 +#define NR_syscalls 294 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/linux/fs.h b/include/linux/fs.h index 302ec20838c..c9bf3746a9f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -474,6 +474,11 @@ struct inode { struct dnotify_struct *i_dnotify; /* for directory notifications */ #endif +#ifdef CONFIG_INOTIFY + struct list_head inotify_watches; /* watches on this inode */ + struct semaphore inotify_sem; /* protects the watches list */ +#endif + unsigned long i_state; unsigned long dirtied_when; /* jiffies of first dirtying */ @@ -1393,7 +1398,6 @@ extern void emergency_remount(void); extern int do_remount_sb(struct super_block *sb, int flags, void *data, int force); extern sector_t bmap(struct inode *, sector_t); -extern int setattr_mask(unsigned int); extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int, struct nameidata *); extern int generic_permission(struct inode *, int, diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h new file mode 100644 index 00000000000..eb581b6cfca --- /dev/null +++ b/include/linux/fsnotify.h @@ -0,0 +1,248 @@ +#ifndef _LINUX_FS_NOTIFY_H +#define _LINUX_FS_NOTIFY_H + +/* + * include/linux/fsnotify.h - generic hooks for filesystem notification, to + * reduce in-source duplication from both dnotify and inotify. + * + * We don't compile any of this away in some complicated menagerie of ifdefs. + * Instead, we rely on the code inside to optimize away as needed. + * + * (C) Copyright 2005 Robert Love + */ + +#ifdef __KERNEL__ + +#include +#include + +/* + * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir + */ +static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, + const char *old_name, const char *new_name, + int isdir) +{ + u32 cookie = inotify_get_cookie(); + + if (old_dir == new_dir) + inode_dir_notify(old_dir, DN_RENAME); + else { + inode_dir_notify(old_dir, DN_DELETE); + inode_dir_notify(new_dir, DN_CREATE); + } + + if (isdir) + isdir = IN_ISDIR; + inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name); + inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name); +} + +/* + * fsnotify_unlink - file was unlinked + */ +static inline void fsnotify_unlink(struct dentry *dentry, struct inode *dir) +{ + struct inode *inode = dentry->d_inode; + + inode_dir_notify(dir, DN_DELETE); + inotify_inode_queue_event(dir, IN_DELETE, 0, dentry->d_name.name); + inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL); + + inotify_inode_is_dead(inode); +} + +/* + * fsnotify_rmdir - directory was removed + */ +static inline void fsnotify_rmdir(struct dentry *dentry, struct inode *inode, + struct inode *dir) +{ + inode_dir_notify(dir, DN_DELETE); + inotify_inode_queue_event(dir,IN_DELETE|IN_ISDIR,0,dentry->d_name.name); + inotify_inode_queue_event(inode, IN_DELETE_SELF | IN_ISDIR, 0, NULL); + inotify_inode_is_dead(inode); +} + +/* + * fsnotify_create - 'name' was linked in + */ +static inline void fsnotify_create(struct inode *inode, const char *name) +{ + inode_dir_notify(inode, DN_CREATE); + inotify_inode_queue_event(inode, IN_CREATE, 0, name); +} + +/* + * fsnotify_mkdir - directory 'name' was created + */ +static inline void fsnotify_mkdir(struct inode *inode, const char *name) +{ + inode_dir_notify(inode, DN_CREATE); + inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, name); +} + +/* + * fsnotify_access - file was read + */ +static inline void fsnotify_access(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + u32 mask = IN_ACCESS; + + if (S_ISDIR(inode->i_mode)) + mask |= IN_ISDIR; + + dnotify_parent(dentry, DN_ACCESS); + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); + inotify_inode_queue_event(inode, mask, 0, NULL); +} + +/* + * fsnotify_modify - file was modified + */ +static inline void fsnotify_modify(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + u32 mask = IN_MODIFY; + + if (S_ISDIR(inode->i_mode)) + mask |= IN_ISDIR; + + dnotify_parent(dentry, DN_MODIFY); + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); + inotify_inode_queue_event(inode, mask, 0, NULL); +} + +/* + * fsnotify_open - file was opened + */ +static inline void fsnotify_open(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + u32 mask = IN_OPEN; + + if (S_ISDIR(inode->i_mode)) + mask |= IN_ISDIR; + + inotify_inode_queue_event(inode, mask, 0, NULL); + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); +} + +/* + * fsnotify_close - file was closed + */ +static inline void fsnotify_close(struct file *file) +{ + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + const char *name = dentry->d_name.name; + mode_t mode = file->f_mode; + u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE; + + if (S_ISDIR(inode->i_mode)) + mask |= IN_ISDIR; + + inotify_dentry_parent_queue_event(dentry, mask, 0, name); + inotify_inode_queue_event(inode, mask, 0, NULL); +} + +/* + * fsnotify_xattr - extended attributes were changed + */ +static inline void fsnotify_xattr(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + u32 mask = IN_ATTRIB; + + if (S_ISDIR(inode->i_mode)) + mask |= IN_ISDIR; + + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); + inotify_inode_queue_event(inode, mask, 0, NULL); +} + +/* + * fsnotify_change - notify_change event. file was modified and/or metadata + * was changed. + */ +static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) +{ + struct inode *inode = dentry->d_inode; + int dn_mask = 0; + u32 in_mask = 0; + + if (ia_valid & ATTR_UID) { + in_mask |= IN_ATTRIB; + dn_mask |= DN_ATTRIB; + } + if (ia_valid & ATTR_GID) { + in_mask |= IN_ATTRIB; + dn_mask |= DN_ATTRIB; + } + if (ia_valid & ATTR_SIZE) { + in_mask |= IN_MODIFY; + dn_mask |= DN_MODIFY; + } + /* both times implies a utime(s) call */ + if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) + { + in_mask |= IN_ATTRIB; + dn_mask |= DN_ATTRIB; + } else if (ia_valid & ATTR_ATIME) { + in_mask |= IN_ACCESS; + dn_mask |= DN_ACCESS; + } else if (ia_valid & ATTR_MTIME) { + in_mask |= IN_MODIFY; + dn_mask |= DN_MODIFY; + } + if (ia_valid & ATTR_MODE) { + in_mask |= IN_ATTRIB; + dn_mask |= DN_ATTRIB; + } + + if (dn_mask) + dnotify_parent(dentry, dn_mask); + if (in_mask) { + if (S_ISDIR(inode->i_mode)) + in_mask |= IN_ISDIR; + inotify_inode_queue_event(inode, in_mask, 0, NULL); + inotify_dentry_parent_queue_event(dentry, in_mask, 0, + dentry->d_name.name); + } +} + +#ifdef CONFIG_INOTIFY /* inotify helpers */ + +/* + * fsnotify_oldname_init - save off the old filename before we change it + */ +static inline const char *fsnotify_oldname_init(const char *name) +{ + return kstrdup(name, GFP_KERNEL); +} + +/* + * fsnotify_oldname_free - free the name we got from fsnotify_oldname_init + */ +static inline void fsnotify_oldname_free(const char *old_name) +{ + kfree(old_name); +} + +#else /* CONFIG_INOTIFY */ + +static inline const char *fsnotify_oldname_init(const char *name) +{ + return NULL; +} + +static inline void fsnotify_oldname_free(const char *old_name) +{ +} + +#endif /* ! CONFIG_INOTIFY */ + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/inotify.h b/include/linux/inotify.h new file mode 100644 index 00000000000..a40c2bf0408 --- /dev/null +++ b/include/linux/inotify.h @@ -0,0 +1,108 @@ +/* + * Inode based directory notification for Linux + * + * Copyright (C) 2005 John McCutchan + */ + +#ifndef _LINUX_INOTIFY_H +#define _LINUX_INOTIFY_H + +#include + +/* + * struct inotify_event - structure read from the inotify device for each event + * + * When you are watching a directory, you will receive the filename for events + * such as IN_CREATE, IN_DELETE, IN_OPEN, IN_CLOSE, ..., relative to the wd. + */ +struct inotify_event { + __s32 wd; /* watch descriptor */ + __u32 mask; /* watch mask */ + __u32 cookie; /* cookie to synchronize two events */ + __u32 len; /* length (including nulls) of name */ + char name[0]; /* stub for possible name */ +}; + +/* the following are legal, implemented events that user-space can watch for */ +#define IN_ACCESS 0x00000001 /* File was accessed */ +#define IN_MODIFY 0x00000002 /* File was modified */ +#define IN_ATTRIB 0x00000004 /* Metadata changed */ +#define IN_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ +#define IN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define IN_OPEN 0x00000020 /* File was opened */ +#define IN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define IN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define IN_CREATE 0x00000100 /* Subfile was created */ +#define IN_DELETE 0x00000200 /* Subfile was deleted */ +#define IN_DELETE_SELF 0x00000400 /* Self was deleted */ + +/* the following are legal events. they are sent as needed to any watch */ +#define IN_UNMOUNT 0x00002000 /* Backing fs was unmounted */ +#define IN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define IN_IGNORED 0x00008000 /* File was ignored */ + +/* helper events */ +#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) /* close */ +#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) /* moves */ + +/* special flags */ +#define IN_ISDIR 0x40000000 /* event occurred against dir */ +#define IN_ONESHOT 0x80000000 /* only send event once */ + +/* + * All of the events - we build the list by hand so that we can add flags in + * the future and not break backward compatibility. Apps will get only the + * events that they originally wanted. Be sure to add new events here! + */ +#define IN_ALL_EVENTS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ + IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \ + IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF) + +#ifdef __KERNEL__ + +#include +#include +#include + +#ifdef CONFIG_INOTIFY + +extern void inotify_inode_queue_event(struct inode *, __u32, __u32, + const char *); +extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, + const char *); +extern void inotify_unmount_inodes(struct list_head *); +extern void inotify_inode_is_dead(struct inode *); +extern u32 inotify_get_cookie(void); + +#else + +static inline void inotify_inode_queue_event(struct inode *inode, + __u32 mask, __u32 cookie, + const char *filename) +{ +} + +static inline void inotify_dentry_parent_queue_event(struct dentry *dentry, + __u32 mask, __u32 cookie, + const char *filename) +{ +} + +static inline void inotify_unmount_inodes(struct list_head *list) +{ +} + +static inline void inotify_inode_is_dead(struct inode *inode) +{ +} + +static inline u32 inotify_get_cookie(void) +{ + return 0; +} + +#endif /* CONFIG_INOTIFY */ + +#endif /* __KERNEL __ */ + +#endif /* _LINUX_INOTIFY_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index ff48815bd3a..dec5827c774 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -410,6 +410,10 @@ struct user_struct { atomic_t processes; /* How many processes does this user have? */ atomic_t files; /* How many open files does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ +#ifdef CONFIG_INOTIFY + atomic_t inotify_watches; /* How many inotify watches does this user have? */ + atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ +#endif /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ unsigned long locked_shm; /* How many pages of mlocked shm ? */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 5b5f434ac9a..ce19a2aa0b2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -61,7 +61,8 @@ enum CTL_DEV=7, /* Devices */ CTL_BUS=8, /* Busses */ CTL_ABI=9, /* Binary emulation */ - CTL_CPU=10 /* CPU stuff (speed scaling, etc) */ + CTL_CPU=10, /* CPU stuff (speed scaling, etc) */ + CTL_INOTIFY=11 /* Inotify */ }; /* CTL_BUS names: */ @@ -70,6 +71,14 @@ enum CTL_BUS_ISA=1 /* ISA */ }; +/* CTL_INOTIFY names: */ +enum +{ + INOTIFY_MAX_USER_DEVICES=1, /* max number of inotify device instances per user */ + INOTIFY_MAX_USER_WATCHES=2, /* max number of inotify watches per user */ + INOTIFY_MAX_QUEUED_EVENTS=3 /* Max number of queued events per inotify device instance */ +}; + /* CTL_KERN names: */ enum { diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 29196ce9b40..42b40ae5ead 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -80,6 +80,9 @@ cond_syscall(sys_keyctl); cond_syscall(compat_sys_keyctl); cond_syscall(compat_sys_socketcall); cond_syscall(sys_set_zone_reclaim); +cond_syscall(sys_inotify_init); +cond_syscall(sys_inotify_add_watch); +cond_syscall(sys_inotify_rm_watch); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 270ee7fadbd..b240e2cb86f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -67,6 +67,12 @@ extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; extern int pid_max_min, pid_max_max; +#ifdef CONFIG_INOTIFY +extern int inotify_max_user_devices; +extern int inotify_max_user_watches; +extern int inotify_max_queued_events; +#endif + #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *, @@ -218,6 +224,7 @@ static ctl_table root_table[] = { .mode = 0555, .child = dev_table, }, + { .ctl_name = 0 } }; @@ -959,6 +966,40 @@ static ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_INOTIFY + { + .ctl_name = INOTIFY_MAX_USER_DEVICES, + .procname = "max_user_devices", + .data = &inotify_max_user_devices, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + + { + .ctl_name = INOTIFY_MAX_USER_WATCHES, + .procname = "max_user_watches", + .data = &inotify_max_user_watches, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + + { + .ctl_name = INOTIFY_MAX_QUEUED_EVENTS, + .procname = "max_queued_events", + .data = &inotify_max_queued_events, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero + }, +#endif { .ctl_name = 0 } }; @@ -968,7 +1009,7 @@ static ctl_table debug_table[] = { static ctl_table dev_table[] = { { .ctl_name = 0 } -}; +}; extern void init_irq_proc (void); diff --git a/kernel/user.c b/kernel/user.c index 734575d5576..89e562feb1b 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -120,6 +120,10 @@ struct user_struct * alloc_uid(uid_t uid) atomic_set(&new->processes, 0); atomic_set(&new->files, 0); atomic_set(&new->sigpending, 0); +#ifdef CONFIG_INOTIFY + atomic_set(&new->inotify_watches, 0); + atomic_set(&new->inotify_devs, 0); +#endif new->mq_bytes = 0; new->locked_shm = 0; -- cgit v1.2.3-70-g09d2