summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 09:13:20 -0800
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 09:13:20 -0800
commit1c1afa3c053d4ccdf44e5a4e159005cdfd48bfc6 (patch)
tree3e686ad4cf1ae2300e7190ff83afc3f3dd4ba740
parent0a01707b289853f56d1c000057b27e243c039722 (diff)
parentac33d0710595579e3cfca42dde2257eb0b123f6d (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
* master.kernel.org:/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (73 commits) [DLM] Clean up lowcomms [GFS2] Change gfs2_fsync() to use write_inode_now() [GFS2] Fix indent in recovery.c [GFS2] Don't flush everything on fdatasync [GFS2] Add a comment about reading the super block [GFS2] Mount problem with the GFS2 code [GFS2] Remove gfs2_check_acl() [DLM] fix format warnings in rcom.c and recoverd.c [GFS2] lock function parameter [DLM] don't accept replies to old recovery messages [DLM] fix size of STATUS_REPLY message [GFS2] fs/gfs2/log.c:log_bmap() fix printk format warning [DLM] fix add_requestqueue checking nodes list [GFS2] Fix recursive locking in gfs2_getattr [GFS2] Fix recursive locking in gfs2_permission [GFS2] Reduce number of arguments to meta_io.c:getbuf() [GFS2] Move gfs2_meta_syncfs() into log.c [GFS2] Fix journal flush problem [GFS2] mark_inode_dirty after write to stuffed file [GFS2] Fix glock ordering on inode creation ...
-rw-r--r--fs/dlm/Kconfig20
-rw-r--r--fs/dlm/Makefile4
-rw-r--r--fs/dlm/dlm_internal.h4
-rw-r--r--fs/dlm/lock.c16
-rw-r--r--fs/dlm/lockspace.c4
-rw-r--r--fs/dlm/lowcomms-sctp.c (renamed from fs/dlm/lowcomms.c)264
-rw-r--r--fs/dlm/lowcomms-tcp.c1189
-rw-r--r--fs/dlm/lowcomms.h2
-rw-r--r--fs/dlm/main.c10
-rw-r--r--fs/dlm/member.c8
-rw-r--r--fs/dlm/rcom.c58
-rw-r--r--fs/dlm/recover.c1
-rw-r--r--fs/dlm/recoverd.c44
-rw-r--r--fs/dlm/requestqueue.c26
-rw-r--r--fs/dlm/requestqueue.h2
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/acl.c39
-rw-r--r--fs/gfs2/acl.h1
-rw-r--r--fs/gfs2/bmap.c179
-rw-r--r--fs/gfs2/daemon.c7
-rw-r--r--fs/gfs2/dir.c93
-rw-r--r--fs/gfs2/dir.h8
-rw-r--r--fs/gfs2/eaops.c2
-rw-r--r--fs/gfs2/eattr.c66
-rw-r--r--fs/gfs2/eattr.h6
-rw-r--r--fs/gfs2/glock.c36
-rw-r--r--fs/gfs2/glock.h3
-rw-r--r--fs/gfs2/glops.c138
-rw-r--r--fs/gfs2/incore.h43
-rw-r--r--fs/gfs2/inode.c406
-rw-r--r--fs/gfs2/inode.h20
-rw-r--r--fs/gfs2/log.c41
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c40
-rw-r--r--fs/gfs2/lops.h2
-rw-r--r--fs/gfs2/meta_io.c46
-rw-r--r--fs/gfs2/meta_io.h1
-rw-r--r--fs/gfs2/ondisk.c138
-rw-r--r--fs/gfs2/ops_address.c52
-rw-r--r--fs/gfs2/ops_dentry.c4
-rw-r--r--fs/gfs2/ops_export.c38
-rw-r--r--fs/gfs2/ops_export.h2
-rw-r--r--fs/gfs2/ops_file.c66
-rw-r--r--fs/gfs2/ops_file.h2
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/ops_inode.c134
-rw-r--r--fs/gfs2/ops_super.c11
-rw-r--r--fs/gfs2/ops_vm.c2
-rw-r--r--fs/gfs2/quota.c15
-rw-r--r--fs/gfs2/recovery.c29
-rw-r--r--fs/gfs2/recovery.h2
-rw-r--r--fs/gfs2/rgrp.c13
-rw-r--r--fs/gfs2/super.c50
-rw-r--r--fs/gfs2/super.h6
-rw-r--r--fs/gfs2/sys.c8
-rw-r--r--fs/gfs2/util.h6
-rw-r--r--include/linux/gfs2_ondisk.h138
57 files changed, 2341 insertions, 1211 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 81b2c6465ee..b5654a284fe 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -1,14 +1,32 @@
menu "Distributed Lock Manager"
- depends on INET && IP_SCTP && EXPERIMENTAL
+ depends on EXPERIMENTAL && INET
config DLM
tristate "Distributed Lock Manager (DLM)"
depends on IPV6 || IPV6=n
select CONFIGFS_FS
+ select IP_SCTP if DLM_SCTP
help
A general purpose distributed lock manager for kernel or userspace
applications.
+choice
+ prompt "Select DLM communications protocol"
+ depends on DLM
+ default DLM_TCP
+ help
+ The DLM Can use TCP or SCTP for it's network communications.
+ SCTP supports multi-homed operations whereas TCP doesn't.
+ However, SCTP seems to have stability problems at the moment.
+
+config DLM_TCP
+ bool "TCP/IP"
+
+config DLM_SCTP
+ bool "SCTP"
+
+endchoice
+
config DLM_DEBUG
bool "DLM debugging"
depends on DLM
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 1832e0297f7..65388944eba 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -4,7 +4,6 @@ dlm-y := ast.o \
dir.o \
lock.o \
lockspace.o \
- lowcomms.o \
main.o \
member.o \
memory.o \
@@ -17,3 +16,6 @@ dlm-y := ast.o \
util.o
dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o
+dlm-$(CONFIG_DLM_TCP) += lowcomms-tcp.o
+
+dlm-$(CONFIG_DLM_SCTP) += lowcomms-sctp.o \ No newline at end of file
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1e5cd67e1b7..1ee8195e6fc 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -471,6 +471,7 @@ struct dlm_ls {
char *ls_recover_buf;
int ls_recover_nodeid; /* for debugging */
uint64_t ls_rcom_seq;
+ spinlock_t ls_rcom_spin;
struct list_head ls_recover_list;
spinlock_t ls_recover_list_lock;
int ls_recover_list_count;
@@ -488,7 +489,8 @@ struct dlm_ls {
#define LSFL_RUNNING 1
#define LSFL_RECOVERY_STOP 2
#define LSFL_RCOM_READY 3
-#define LSFL_UEVENT_WAIT 4
+#define LSFL_RCOM_WAIT 4
+#define LSFL_UEVENT_WAIT 5
/* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 3f2befa4797..30878defaeb 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -2372,6 +2372,7 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
{
lkb->lkb_exflags = ms->m_exflags;
+ lkb->lkb_sbflags = ms->m_sbflags;
lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
(ms->m_flags & 0x0000FFFF);
}
@@ -3028,10 +3029,17 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
while (1) {
if (dlm_locking_stopped(ls)) {
- if (!recovery)
- dlm_add_requestqueue(ls, nodeid, hd);
- error = -EINTR;
- goto out;
+ if (recovery) {
+ error = -EINTR;
+ goto out;
+ }
+ error = dlm_add_requestqueue(ls, nodeid, hd);
+ if (error == -EAGAIN)
+ continue;
+ else {
+ error = -EINTR;
+ goto out;
+ }
}
if (lock_recovery_try(ls))
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f8842ca443c..59012b089e8 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -22,6 +22,7 @@
#include "memory.h"
#include "lock.h"
#include "recover.h"
+#include "requestqueue.h"
#ifdef CONFIG_DLM_DEBUG
int dlm_create_debug_file(struct dlm_ls *ls);
@@ -478,6 +479,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_recoverd_task = NULL;
mutex_init(&ls->ls_recoverd_active);
spin_lock_init(&ls->ls_recover_lock);
+ spin_lock_init(&ls->ls_rcom_spin);
+ get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
ls->ls_recover_status = 0;
ls->ls_recover_seq = 0;
ls->ls_recover_args = NULL;
@@ -684,6 +687,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
* Free structures on any other lists
*/
+ dlm_purge_requestqueue(ls);
kfree(ls->ls_recover_args);
dlm_clear_free_entries(ls);
dlm_clear_members(ls);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms-sctp.c
index 6da6b14d5a6..fe158d7a928 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -75,13 +75,13 @@ struct nodeinfo {
};
static DEFINE_IDR(nodeinfo_idr);
-static struct rw_semaphore nodeinfo_lock;
-static int max_nodeid;
+static DECLARE_RWSEM(nodeinfo_lock);
+static int max_nodeid;
struct cbuf {
- unsigned base;
- unsigned len;
- unsigned mask;
+ unsigned int base;
+ unsigned int len;
+ unsigned int mask;
};
/* Just the one of these, now. But this struct keeps
@@ -90,9 +90,9 @@ struct cbuf {
#define CF_READ_PENDING 1
struct connection {
- struct socket *sock;
+ struct socket *sock;
unsigned long flags;
- struct page *rx_page;
+ struct page *rx_page;
atomic_t waiting_requests;
struct cbuf cb;
int eagain_flag;
@@ -102,36 +102,40 @@ struct connection {
struct writequeue_entry {
struct list_head list;
- struct page *page;
+ struct page *page;
int offset;
int len;
int end;
int users;
- struct nodeinfo *ni;
+ struct nodeinfo *ni;
};
-#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
-#define CBUF_EMPTY(cb) ((cb)->len == 0)
-#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
-#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+static void cbuf_add(struct cbuf *cb, int n)
+{
+ cb->len += n;
+}
-#define CBUF_INIT(cb, size) \
-do { \
- (cb)->base = (cb)->len = 0; \
- (cb)->mask = ((size)-1); \
-} while(0)
+static int cbuf_data(struct cbuf *cb)
+{
+ return ((cb->base + cb->len) & cb->mask);
+}
-#define CBUF_EAT(cb, n) \
-do { \
- (cb)->len -= (n); \
- (cb)->base += (n); \
- (cb)->base &= (cb)->mask; \
-} while(0)
+static void cbuf_init(struct cbuf *cb, int size)
+{
+ cb->base = cb->len = 0;
+ cb->mask = size-1;
+}
+static void cbuf_eat(struct cbuf *cb, int n)
+{
+ cb->len -= n;
+ cb->base += n;
+ cb->base &= cb->mask;
+}
/* List of nodes which have writes pending */
-static struct list_head write_nodes;
-static spinlock_t write_nodes_lock;
+static LIST_HEAD(write_nodes);
+static DEFINE_SPINLOCK(write_nodes_lock);
/* Maximum number of incoming messages to process before
* doing a schedule()
@@ -141,8 +145,7 @@ static spinlock_t write_nodes_lock;
/* Manage daemons */
static struct task_struct *recv_task;
static struct task_struct *send_task;
-static wait_queue_head_t lowcomms_recv_wait;
-static atomic_t accepting;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait);
/* The SCTP connection */
static struct connection sctp_con;
@@ -161,11 +164,11 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
return error;
if (dlm_local_addr[0]->ss_family == AF_INET) {
- struct sockaddr_in *in4 = (struct sockaddr_in *) &addr;
+ struct sockaddr_in *in4 = (struct sockaddr_in *) &addr;
struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
} else {
- struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr;
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr;
struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
memcpy(&ret6->sin6_addr, &in6->sin6_addr,
sizeof(in6->sin6_addr));
@@ -174,6 +177,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
return 0;
}
+/* If alloc is 0 here we will not attempt to allocate a new
+ nodeinfo struct */
static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
{
struct nodeinfo *ni;
@@ -184,44 +189,45 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
ni = idr_find(&nodeinfo_idr, nodeid);
up_read(&nodeinfo_lock);
- if (!ni && alloc) {
- down_write(&nodeinfo_lock);
+ if (ni || !alloc)
+ return ni;
- ni = idr_find(&nodeinfo_idr, nodeid);
- if (ni)
- goto out_up;
+ down_write(&nodeinfo_lock);
- r = idr_pre_get(&nodeinfo_idr, alloc);
- if (!r)
- goto out_up;
+ ni = idr_find(&nodeinfo_idr, nodeid);
+ if (ni)
+ goto out_up;
- ni = kmalloc(sizeof(struct nodeinfo), alloc);
- if (!ni)
- goto out_up;
+ r = idr_pre_get(&nodeinfo_idr, alloc);
+ if (!r)
+ goto out_up;
- r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
- if (r) {
- kfree(ni);
- ni = NULL;
- goto out_up;
- }
- if (n != nodeid) {
- idr_remove(&nodeinfo_idr, n);
- kfree(ni);
- ni = NULL;
- goto out_up;
- }
- memset(ni, 0, sizeof(struct nodeinfo));
- spin_lock_init(&ni->lock);
- INIT_LIST_HEAD(&ni->writequeue);
- spin_lock_init(&ni->writequeue_lock);
- ni->nodeid = nodeid;
-
- if (nodeid > max_nodeid)
- max_nodeid = nodeid;
- out_up:
- up_write(&nodeinfo_lock);
+ ni = kmalloc(sizeof(struct nodeinfo), alloc);
+ if (!ni)
+ goto out_up;
+
+ r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
+ if (r) {
+ kfree(ni);
+ ni = NULL;
+ goto out_up;
}
+ if (n != nodeid) {
+ idr_remove(&nodeinfo_idr, n);
+ kfree(ni);
+ ni = NULL;
+ goto out_up;
+ }
+ memset(ni, 0, sizeof(struct nodeinfo));
+ spin_lock_init(&ni->lock);
+ INIT_LIST_HEAD(&ni->writequeue);
+ spin_lock_init(&ni->writequeue_lock);
+ ni->nodeid = nodeid;
+
+ if (nodeid > max_nodeid)
+ max_nodeid = nodeid;
+out_up:
+ up_write(&nodeinfo_lock);
return ni;
}
@@ -279,13 +285,13 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
in4_addr->sin_port = cpu_to_be16(port);
memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) -
- sizeof(struct sockaddr_in));
+ sizeof(struct sockaddr_in));
*addr_len = sizeof(struct sockaddr_in);
} else {
struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
in6_addr->sin6_port = cpu_to_be16(port);
memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) -
- sizeof(struct sockaddr_in6));
+ sizeof(struct sockaddr_in6));
*addr_len = sizeof(struct sockaddr_in6);
}
}
@@ -324,7 +330,7 @@ static void send_shutdown(sctp_assoc_t associd)
cmsg->cmsg_type = SCTP_SNDRCV;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
outmessage.msg_controllen = cmsg->cmsg_len;
- sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+ sinfo = CMSG_DATA(cmsg);
memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
sinfo->sinfo_flags |= MSG_EOF;
@@ -387,7 +393,7 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
log_print("COMM_UP for invalid assoc ID %d",
- (int)sn->sn_assoc_change.sac_assoc_id);
+ (int)sn->sn_assoc_change.sac_assoc_id);
init_failed();
return;
}
@@ -398,15 +404,18 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
fs = get_fs();
set_fs(get_ds());
ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
- IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
- (char*)&prim, &prim_len);
+ IPPROTO_SCTP,
+ SCTP_PRIMARY_ADDR,
+ (char*)&prim,
+ &prim_len);
set_fs(fs);
if (ret < 0) {
struct nodeinfo *ni;
log_print("getsockopt/sctp_primary_addr on "
"new assoc %d failed : %d",
- (int)sn->sn_assoc_change.sac_assoc_id, ret);
+ (int)sn->sn_assoc_change.sac_assoc_id,
+ ret);
/* Retry INIT later */
ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
@@ -426,12 +435,10 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
return;
/* Save the assoc ID */
- spin_lock(&ni->lock);
ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
- spin_unlock(&ni->lock);
log_print("got new/restarted association %d nodeid %d",
- (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
+ (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
/* Send any pending writes */
clear_bit(NI_INIT_PENDING, &ni->flags);
@@ -507,13 +514,12 @@ static int receive_from_sock(void)
sctp_con.rx_page = alloc_page(GFP_ATOMIC);
if (sctp_con.rx_page == NULL)
goto out_resched;
- CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE);
+ cbuf_init(&sctp_con.cb, PAGE_CACHE_SIZE);
}
memset(&incmsg, 0, sizeof(incmsg));
memset(&msgname, 0, sizeof(msgname));
- memset(incmsg, 0, sizeof(incmsg));
msg.msg_name = &msgname;
msg.msg_namelen = sizeof(msgname);
msg.msg_flags = 0;
@@ -532,17 +538,17 @@ static int receive_from_sock(void)
* iov[0] is the bit of the circular buffer between the current end
* point (cb.base + cb.len) and the end of the buffer.
*/
- iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb);
+ iov[0].iov_len = sctp_con.cb.base - cbuf_data(&sctp_con.cb);
iov[0].iov_base = page_address(sctp_con.rx_page) +
- CBUF_DATA(&sctp_con.cb);
+ cbuf_data(&sctp_con.cb);
iov[1].iov_len = 0;
/*
* iov[1] is the bit of the circular buffer between the start of the
* buffer and the start of the currently used section (cb.base)
*/
- if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) {
- iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb);
+ if (cbuf_data(&sctp_con.cb) >= sctp_con.cb.base) {
+ iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&sctp_con.cb);
iov[1].iov_len = sctp_con.cb.base;
iov[1].iov_base = page_address(sctp_con.rx_page);
msg.msg_iovlen = 2;
@@ -557,7 +563,7 @@ static int receive_from_sock(void)
msg.msg_control = incmsg;
msg.msg_controllen = sizeof(incmsg);
cmsg = CMSG_FIRSTHDR(&msg);
- sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+ sinfo = CMSG_DATA(cmsg);
if (msg.msg_flags & MSG_NOTIFICATION) {
process_sctp_notification(&msg, page_address(sctp_con.rx_page));
@@ -583,29 +589,29 @@ static int receive_from_sock(void)
if (r == 1)
return 0;
- CBUF_ADD(&sctp_con.cb, ret);
+ cbuf_add(&sctp_con.cb, ret);
ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
page_address(sctp_con.rx_page),
sctp_con.cb.base, sctp_con.cb.len,
PAGE_CACHE_SIZE);
if (ret < 0)
goto out_close;
- CBUF_EAT(&sctp_con.cb, ret);
+ cbuf_eat(&sctp_con.cb, ret);
- out:
+out:
ret = 0;
goto out_ret;
- out_resched:
+out_resched:
lowcomms_data_ready(sctp_con.sock->sk, 0);
ret = 0;
- schedule();
+ cond_resched();
goto out_ret;
- out_close:
+out_close:
if (ret != -EAGAIN)
log_print("error reading from sctp socket: %d", ret);
- out_ret:
+out_ret:
return ret;
}
@@ -619,10 +625,12 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
set_fs(get_ds());
if (num == 1)
result = sctp_con.sock->ops->bind(sctp_con.sock,
- (struct sockaddr *) addr, addr_len);
+ (struct sockaddr *) addr,
+ addr_len);
else
result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
- SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len);
+ SCTP_SOCKOPT_BINDX_ADD,
+ (char *)addr, addr_len);
set_fs(fs);
if (result < 0)
@@ -719,10 +727,10 @@ static int init_sock(void)
return 0;
- create_delsock:
+create_delsock:
sock_release(sock);
sctp_con.sock = NULL;
- out:
+out:
return result;
}
@@ -756,16 +764,13 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
int users = 0;
struct nodeinfo *ni;
- if (!atomic_read(&accepting))
- return NULL;
-
ni = nodeid2nodeinfo(nodeid, allocation);
if (!ni)
return NULL;
spin_lock(&ni->writequeue_lock);
e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
- if (((struct list_head *) e == &ni->writequeue) ||
+ if ((&e->list == &ni->writequeue) ||
(PAGE_CACHE_SIZE - e->end < len)) {
e = NULL;
} else {
@@ -776,7 +781,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
spin_unlock(&ni->writequeue_lock);
if (e) {
- got_one:
+ got_one:
if (users == 0)
kmap(e->page);
*ppc = page_address(e->page) + offset;
@@ -803,9 +808,6 @@ void dlm_lowcomms_commit_buffer(void *arg)
int users;
struct nodeinfo *ni = e->ni;
- if (!atomic_read(&accepting))
- return;
-
spin_lock(&ni->writequeue_lock);
users = --e->users;
if (users)
@@ -822,7 +824,7 @@ void dlm_lowcomms_commit_buffer(void *arg)
}
return;
- out:
+out:
spin_unlock(&ni->writequeue_lock);
return;
}
@@ -878,7 +880,7 @@ static void initiate_association(int nodeid)
cmsg->cmsg_level = IPPROTO_SCTP;
cmsg->cmsg_type = SCTP_SNDRCV;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
- sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+ sinfo = CMSG_DATA(cmsg);
memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
@@ -892,7 +894,7 @@ static void initiate_association(int nodeid)
}
/* Send a message */
-static int send_to_sock(struct nodeinfo *ni)
+static void send_to_sock(struct nodeinfo *ni)
{
int ret = 0;
struct writequeue_entry *e;
@@ -903,13 +905,13 @@ static int send_to_sock(struct nodeinfo *ni)
struct sctp_sndrcvinfo *sinfo;
struct kvec iov;
- /* See if we need to init an association before we start
+ /* See if we need to init an association before we start
sending precious messages */
spin_lock(&ni->lock);
if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
spin_unlock(&ni->lock);
initiate_association(ni->nodeid);
- return 0;
+ return;
}
spin_unlock(&ni->lock);
@@ -923,7 +925,7 @@ static int send_to_sock(struct nodeinfo *ni)
cmsg->cmsg_level = IPPROTO_SCTP;
cmsg->cmsg_type = SCTP_SNDRCV;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
- sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+ sinfo = CMSG_DATA(cmsg);
memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
sinfo->sinfo_assoc_id = ni->assoc_id;
@@ -955,7 +957,7 @@ static int send_to_sock(struct nodeinfo *ni)
goto send_error;
} else {
/* Don't starve people filling buffers */
- schedule();
+ cond_resched();
}
spin_lock(&ni->writequeue_lock);
@@ -964,15 +966,16 @@ static int send_to_sock(struct nodeinfo *ni)
if (e->len == 0 && e->users == 0) {
list_del(&e->list);
+ kunmap(e->page);
free_entry(e);
continue;
}
}
spin_unlock(&ni->writequeue_lock);
- out:
- return ret;
+out:
+ return;
- send_error:
+send_error:
log_print("Error sending to node %d %d", ni->nodeid, ret);
spin_lock(&ni->lock);
if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
@@ -982,7 +985,7 @@ static int send_to_sock(struct nodeinfo *ni)
} else
spin_unlock(&ni->lock);
- return ret;
+ return;
}
/* Try to send any messages that are pending */
@@ -994,7 +997,7 @@ static void process_output_queue(void)
spin_lock_bh(&write_nodes_lock);
list_for_each_safe(list, temp, &write_nodes) {
struct nodeinfo *ni =
- list_entry(list, struct nodeinfo, write_list);
+ list_entry(list, struct nodeinfo, write_list);
clear_bit(NI_WRITE_PENDING, &ni->flags);
list_del(&ni->write_list);
@@ -1106,7 +1109,7 @@ static int dlm_recvd(void *data)
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&lowcomms_recv_wait, &wait);
if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
- schedule();
+ cond_resched();
remove_wait_queue(&lowcomms_recv_wait, &wait);
set_current_state(TASK_RUNNING);
@@ -1118,12 +1121,12 @@ static int dlm_recvd(void *data)
/* Don't starve out everyone else */
if (++count >= MAX_RX_MSG_COUNT) {
- schedule();
+ cond_resched();
count = 0;
}
} while (!kthread_should_stop() && ret >=0);
}
- schedule();
+ cond_resched();
}
return 0;
@@ -1138,7 +1141,7 @@ static int dlm_sendd(void *data)
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if (write_list_empty())
- schedule();
+ cond_resched();
set_current_state(TASK_RUNNING);
if (sctp_con.eagain_flag) {
@@ -1166,7 +1169,7 @@ static int daemons_start(void)
p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
error = IS_ERR(p);
- if (error) {
+ if (error) {
log_print("can't start dlm_recvd %d", error);
return error;
}
@@ -1174,7 +1177,7 @@ static int daemons_start(void)
p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
error = IS_ERR(p);
- if (error) {
+ if (error) {
log_print("can't start dlm_sendd %d", error);
kthread_stop(recv_task);
return error;
@@ -1197,43 +1200,28 @@ int dlm_lowcomms_start(void)
error = daemons_start();
if (error)
goto fail_sock;
- atomic_set(&accepting, 1);
return 0;
- fail_sock:
+fail_sock:
close_connection();
return error;
}
-/* Set all the activity flags to prevent any socket activity. */
-
void dlm_lowcomms_stop(void)
{
- atomic_set(&accepting, 0);
+ int i;
+
sctp_con.flags = 0x7;
daemons_stop();
clean_writequeues();
close_connection();
dealloc_nodeinfo();
max_nodeid = 0;
-}
-int dlm_lowcomms_init(void)
-{
- init_waitqueue_head(&lowcomms_recv_wait);
- spin_lock_init(&write_nodes_lock);
- INIT_LIST_HEAD(&write_nodes);
- init_rwsem(&nodeinfo_lock);
- return 0;
-}
-
-void dlm_lowcomms_exit(void)
-{
- int i;
+ dlm_local_count = 0;
+ dlm_local_nodeid = 0;
for (i = 0; i < dlm_local_count; i++)
kfree(dlm_local_addr[i]);
- dlm_local_count = 0;
- dlm_local_nodeid = 0;
}
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
new file mode 100644
index 00000000000..8f2791fc844
--- /dev/null
+++ b/fs/dlm/lowcomms-tcp.c
@@ -0,0 +1,1189 @@
+/******************************************************************************
+*******************************************************************************
+**
+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
+** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+**
+** This copyrighted material is made available to anyone wishing to use,
+** modify, copy, or redistribute it subject to the terms and conditions
+** of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/*
+ * lowcomms.c
+ *
+ * This is the "low-level" comms layer.
+ *
+ * It is responsible for sending/receiving messages
+ * from other nodes in the cluster.
+ *
+ * Cluster nodes are referred to by their nodeids. nodeids are
+ * simply 32 bit numbers to the locking module - if they need to
+ * be expanded for the cluster infrastructure then that is it's
+ * responsibility. It is this layer's
+ * responsibility to resolve these into IP address or
+ * whatever it needs for inter-node communication.
+ *
+ * The comms level is two kernel threads that deal mainly with
+ * the receiving of messages from other nodes and passing them
+ * up to the mid-level comms layer (which understands the
+ * message format) for execution by the locking core, and
+ * a send thread which does all the setting up of connections
+ * to remote nodes and the sending of data. Threads are not allowed
+ * to send their own data because it may cause them to wait in times
+ * of high load. Also, this way, the sending thread can collect together
+ * messages bound for one node and send them in one block.
+ *
+ * I don't see any problem with the recv thread executing the locking
+ * code on behalf of remote processes as the locking code is
+ * short, efficient and never waits.
+ *
+ */
+
+
+#include <asm/ioctls.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/pagemap.h>
+
+#include "dlm_internal.h"
+#include "lowcomms.h"
+#include "midcomms.h"
+#include "config.h"
+
+struct cbuf {
+ unsigned int base;
+ unsigned int len;
+ unsigned int mask;
+};
+
+#define NODE_INCREMENT 32
+static void cbuf_add(struct cbuf *cb, int n)
+{
+ cb->len += n;
+}
+
+static int cbuf_data(struct cbuf *cb)
+{
+ return ((cb->base + cb->len) & cb->mask);
+}
+
+static void cbuf_init(struct cbuf *cb, int size)
+{
+ cb->base = cb->len = 0;
+ cb->mask = size-1;
+}
+
+static void cbuf_eat(struct cbuf *cb, int n)
+{
+ cb->len -= n;
+ cb->base += n;
+ cb->base &= cb->mask;
+}
+
+static bool cbuf_empty(struct cbuf *cb)
+{
+ return cb->len == 0;
+}
+
+/* Maximum number of incoming messages to process before
+ doing a cond_resched()
+*/
+#define MAX_RX_MSG_COUNT 25
+
+struct connection {
+ struct socket *sock; /* NULL if not connected */
+ uint32_t nodeid; /* So we know who we are in the list */
+ struct rw_semaphore sock_sem; /* Stop connect races */
+ struct list_head read_list; /* On this list when ready for reading */
+ struct list_head write_list; /* On this list when ready for writing */
+ struct list_head state_list; /* On this list when ready to connect */
+ unsigned long flags; /* bit 1,2 = We are on the read/write lists */
+#define CF_READ_PENDING 1
+#define CF_WRITE_PENDING 2
+#define CF_CONNECT_PENDING 3
+#define CF_IS_OTHERCON 4
+ struct list_head writequeue; /* List of outgoing writequeue_entries */
+ struct list_head listenlist; /* List of allocated listening sockets */
+ spinlock_t writequeue_lock;
+ int (*rx_action) (struct connection *); /* What to do when active */
+ struct page *rx_page;
+ struct cbuf cb;
+ int retries;
+ atomic_t waiting_requests;
+#define MAX_CONNECT_RETRIES 3
+ struct connection *othercon;
+};
+#define sock2con(x) ((struct connection *)(x)->sk_user_data)
+
+/* An entry waiting to be sent */
+struct writequeue_entry {
+ struct list_head list;
+ struct page *page;
+ int offset;
+ int len;
+ int end;
+ int users;
+ struct connection *con;
+};
+
+static struct sockaddr_storage dlm_local_addr;
+
+/* Manage daemons */
+static struct task_struct *recv_task;
+static struct task_struct *send_task;
+
+static wait_queue_t lowcomms_send_waitq_head;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
+static wait_queue_t lowcomms_recv_waitq_head;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
+
+/* An array of pointers to connections, indexed by NODEID */
+static struct connection **connections;
+static DECLARE_MUTEX(connections_lock);
+static kmem_cache_t *con_cache;
+static int conn_array_size;
+
+/* List of sockets that have reads pending */
+static LIST_HEAD(read_sockets);
+static DEFINE_SPINLOCK(read_sockets_lock);
+
+/* List of sockets which have writes pending */
+static LIST_HEAD(write_sockets);
+static DEFINE_SPINLOCK(write_sockets_lock);
+
+/* List of sockets which have connects pending */
+static LIST_HEAD(state_sockets);
+static DEFINE_SPINLOCK(state_sockets_lock);
+
+static struct connection *nodeid2con(int nodeid, gfp_t allocation)
+{
+ struct connection *con = NULL;
+
+ down(&connections_lock);
+ if (nodeid >= conn_array_size) {
+ int new_size = nodeid + NODE_INCREMENT;
+ struct connection **new_conns;
+
+ new_conns = kzalloc(sizeof(struct connection *) *
+ new_size, allocation);
+ if (!new_conns)
+ goto finish;
+
+ memcpy(new_conns, connections, sizeof(struct connection *) * conn_array_size);
+ conn_array_size = new_size;
+ kfree(connections);
+ connections = new_conns;
+
+ }
+
+ con = connections[nodeid];
+ if (con == NULL && allocation) {
+ con = kmem_cache_zalloc(con_cache, allocation);
+ if (!con)
+ goto finish;
+
+ con->nodeid = nodeid;
+ init_rwsem(&con->sock_sem);
+ INIT_LIST_HEAD(&con->writequeue);
+ spin_lock_init(&con->writequeue_lock);
+
+ connections[nodeid] = con;
+ }
+
+finish:
+ up(&connections_lock);
+ return con;
+}
+
+/* Data available on socket or listen socket received a connect */
+static void lowcomms_data_ready(struct sock *sk, int count_unused)
+{
+ struct connection *con = sock2con(sk);
+
+ atomic_inc(&con->waiting_requests);
+ if (test_and_set_bit(CF_READ_PENDING, &con->flags))
+ return;
+
+ spin_lock_bh(&read_sockets_lock);
+ list_add_tail(&con->read_list, &read_sockets);
+ spin_unlock_bh(&read_sockets_lock);
+
+ wake_up_interruptible(&lowcomms_recv_waitq);
+}
+
+static void lowcomms_write_space(struct sock *sk)
+{
+ struct connection *con = sock2con(sk);
+
+ if (test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+ return;
+
+ spin_lock_bh(&write_sockets_lock);
+ list_add_tail(&con->write_list, &write_sockets);
+ spin_unlock_bh(&write_sockets_lock);
+
+ wake_up_interruptible(&lowcomms_send_waitq);
+}
+
+static inline void lowcomms_connect_sock(struct connection *con)
+{
+ if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
+ return;
+
+ spin_lock_bh(&state_sockets_lock);
+ list_add_tail(&con->state_list, &state_sockets);
+ spin_unlock_bh(&state_sockets_lock);
+
+ wake_up_interruptible(&lowcomms_send_waitq);
+}
+
+static void lowcomms_state_change(struct sock *sk)
+{
+ if (sk->sk_state == TCP_ESTABLISHED)
+ lowcomms_write_space(sk);
+}
+
+/* Make a socket active */
+static int add_sock(struct socket *sock, struct connection *con)
+{
+ con->sock = sock;
+
+ /* Install a data_ready callback */
+ con->sock->sk->sk_data_ready = lowcomms_data_ready;
+ con->sock->sk->sk_write_space = lowcomms_write_space;
+ con->sock->sk->sk_state_change = lowcomms_state_change;
+
+ return 0;
+}
+
+/* Add the port number to an IP6 or 4 sockaddr and return the address
+ length */
+static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
+ int *addr_len)
+{
+ saddr->ss_family = dlm_local_addr.ss_family;
+ if (saddr->ss_family == AF_INET) {
+ struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
+ in4_addr->sin_port = cpu_to_be16(port);
+ *addr_len = sizeof(struct sockaddr_in);
+ } else {
+ struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
+ in6_addr->sin6_port = cpu_to_be16(port);
+ *addr_len = sizeof(struct sockaddr_in6);
+ }
+}
+
+/* Close a remote connection and tidy up */
+static void close_connection(struct connection *con, bool and_other)
+{
+ down_write(&con->sock_sem);
+
+ if (con->sock) {
+ sock_release(con->sock);
+ con->sock = NULL;
+ }
+ if (con->othercon && and_other) {
+ /* Will only re-enter once. */
+ close_connection(con->othercon, false);
+ }
+ if (con->rx_page) {
+ __free_page(con->rx_page);
+ con->rx_page = NULL;
+ }
+ con->retries = 0;
+ up_write(&con->sock_sem);
+}
+
+/* Data received from remote end */
+static int receive_from_sock(struct connection *con)
+{
+ int ret = 0;
+ struct msghdr msg;
+ struct iovec iov[2];
+ mm_segment_t fs;
+ unsigned len;
+ int r;
+ int call_again_soon = 0;
+
+ down_read(&con->sock_sem);
+
+ if (con->sock == NULL)
+ goto out;
+ if (con->rx_page == NULL) {
+ /*
+ * This doesn't need to be atomic, but I think it should
+ * improve performance if it is.
+ */
+ con->rx_page = alloc_page(GFP_ATOMIC);
+ if (con->rx_page == NULL)
+ goto out_resched;
+ cbuf_init(&con->cb, PAGE_CACHE_SIZE);
+ }
+
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_iovlen = 1;
+ msg.msg_iov = iov;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_flags = 0;
+
+ /*
+ * iov[0] is the bit of the circular buffer between the current end
+ * point (cb.base + cb.len) and the end of the buffer.
+ */
+ iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
+ iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
+ iov[1].iov_len = 0;
+
+ /*
+ * iov[1] is the bit of the circular buffer between the start of the
+ * buffer and the start of the currently used section (cb.base)
+ */
+ if (cbuf_data(&con->cb) >= con->cb.base) {
+ iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
+ iov[1].iov_len = con->cb.base;
+ iov[1].iov_base = page_address(con->rx_page);
+ msg.msg_iovlen = 2;
+ }
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ fs = get_fs();
+ set_fs(get_ds());
+ r = ret = sock_recvmsg(con->sock, &msg, len,
+ MSG_DONTWAIT | MSG_NOSIGNAL);
+ set_fs(fs);
+
+ if (ret <= 0)
+ goto out_close;
+ if (ret == len)
+ call_again_soon = 1;
+ cbuf_add(&con->cb, ret);
+ ret = dlm_process_incoming_buffer(con->nodeid,
+ page_address(con->rx_page),
+ con->cb.base, con->cb.len,
+ PAGE_CACHE_SIZE);
+ if (ret == -EBADMSG) {
+ printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, "
+ "iov_len=%u, iov_base[0]=%p, read=%d\n",
+ page_address(con->rx_page), con->cb.base, con->cb.len,
+ len, iov[0].iov_base, r);
+ }
+ if (ret < 0)
+ goto out_close;
+ cbuf_eat(&con->cb, ret);
+
+ if (cbuf_empty(&con->cb) && !call_again_soon) {
+ __free_page(con->rx_page);
+ con->rx_page = NULL;
+ }
+
+out:
+ if (call_again_soon)
+ goto out_resched;
+ up_read(&con->sock_sem);
+ return 0;
+
+out_resched:
+ lowcomms_data_ready(con->sock->sk, 0);
+ up_read(&con->sock_sem);
+ cond_resched();
+ return 0;
+
+out_close:
+ up_read(&con->sock_sem);
+ if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
+ close_connection(con, false);
+ /* Reconnect when there is something to send */
+ }
+
+ return ret;
+}
+
+/* Listening socket is busy, accept a connection */
+static int accept_from_sock(struct connection *con)
+{
+ int result;
+ struct sockaddr_storage peeraddr;
+ struct socket *newsock;
+ int len;
+ int nodeid;
+ struct connection *newcon;
+
+ memset(&peeraddr, 0, sizeof(peeraddr));
+ result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &newsock);
+ if (result < 0)
+ return -ENOMEM;
+
+ down_read(&con->sock_sem);
+
+ result = -ENOTCONN;
+ if (con->sock == NULL)
+ goto accept_err;
+
+ newsock->type = con->sock->type;
+ newsock->ops = con->sock->ops;
+
+ result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+ if (result < 0)
+ goto accept_err;
+
+ /* Get the connected socket's peer */
+ memset(&peeraddr, 0, sizeof(peeraddr));
+ if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
+ &len, 2)) {
+ result = -ECONNABORTED;
+ goto accept_err;
+ }
+
+ /* Get the new node's NODEID */
+ make_sockaddr(&peeraddr, 0, &len);
+ if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
+ printk("dlm: connect from non cluster node\n");
+ sock_release(newsock);
+ up_read(&con->sock_sem);
+ return -1;
+ }
+
+ log_print("got connection from %d", nodeid);
+
+ /* Check to see if we already have a connection to this node. This
+ * could happen if the two nodes initiate a connection at roughly
+ * the same time and the connections cross on the wire.
+ * TEMPORARY FIX:
+ * In this case we store the incoming one in "othercon"
+ */
+ newcon = nodeid2con(nodeid, GFP_KERNEL);
+ if (!newcon) {
+ result = -ENOMEM;
+ goto accept_err;
+ }
+ down_write(&newcon->sock_sem);
+ if (newcon->sock) {
+ struct connection *othercon = newcon->othercon;
+
+ if (!othercon) {
+ othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
+ if (!othercon) {
+ printk("dlm: failed to allocate incoming socket\n");
+ up_write(&newcon->sock_sem);
+ result = -ENOMEM;
+ goto accept_err;
+ }
+ othercon->nodeid = nodeid;
+ othercon->rx_action = receive_from_sock;
+ init_rwsem(&othercon->sock_sem);
+ set_bit(CF_IS_OTHERCON, &othercon->flags);
+ newcon->othercon = othercon;
+ }
+ othercon->sock = newsock;
+ newsock->sk->sk_user_data = othercon;
+ add_sock(newsock, othercon);
+ }
+ else {
+ newsock->sk->sk_user_data = newcon;
+ newcon->rx_action = receive_from_sock;
+ add_sock(newsock, newcon);
+
+ }
+
+ up_write(&newcon->sock_sem);
+
+ /*
+ * Add it to the active queue in case we got data
+ * beween processing the accept adding the socket
+ * to the read_sockets list
+ */
+ lowcomms_data_ready(newsock->sk, 0);
+ up_read(&con->sock_sem);
+
+ return 0;
+
+accept_err:
+ up_read(&con->sock_sem);
+ sock_release(newsock);
+
+ if (result != -EAGAIN)
+ printk("dlm: error accepting connection from node: %d\n", result);
+ return result;
+}
+
+/* Connect a new socket to its peer */
+static void connect_to_sock(struct connection *con)
+{
+ int result = -EHOSTUNREACH;
+ struct sockaddr_storage saddr;
+ int addr_len;
+ struct socket *sock;
+
+ if (con->nodeid == 0) {
+ log_print("attempt to connect sock 0 foiled");
+ return;
+ }
+
+ down_write(&con->sock_sem);
+ if (con->retries++ > MAX_CONNECT_RETRIES)
+ goto out;
+
+ /* Some odd races can cause double-connects, ignore them */
+ if (con->sock) {
+ result = 0;
+ goto out;
+ }
+
+ /* Create a socket to communicate with */
+ result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
+ if (result < 0)
+ goto out_err;
+
+ memset(&saddr, 0, sizeof(saddr));
+ if (dlm_nodeid_to_addr(con->nodeid, &saddr))
+ goto out_err;
+
+ sock->sk->sk_user_data = con;
+ con->rx_action = receive_from_sock;
+
+ make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len);
+
+ add_sock(sock, con);
+
+ log_print("connecting to %d", con->nodeid);
+ result =
+ sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
+ O_NONBLOCK);
+ if (result == -EINPROGRESS)
+ result = 0;
+ if (result == 0)
+ goto out;
+
+out_err:
+ if (con->sock) {
+ sock_release(con->sock);
+ con->sock = NULL;
+ }
+ /*
+ * Some errors are fatal and this list might need adjusting. For other
+ * errors we try again until the max number of retries is reached.
+ */
+ if (result != -EHOSTUNREACH && result != -ENETUNREACH &&
+ result != -ENETDOWN && result != EINVAL
+ && result != -EPROTONOSUPPORT) {
+ lowcomms_connect_sock(con);
+ result = 0;
+ }
+out:
+ up_write(&con->sock_sem);
+ return;
+}
+
+static struct socket *create_listen_sock(struct connection *con,
+ struct sockaddr_storage *saddr)
+{
+ struct socket *sock = NULL;
+ mm_segment_t fs;
+ int result = 0;
+ int one = 1;
+ int addr_len;
+
+ if (dlm_local_addr.ss_family == AF_INET)
+ addr_len = sizeof(struct sockaddr_in);
+ else
+ addr_len = sizeof(struct sockaddr_in6);
+
+ /* Create a socket to communicate with */
+ result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (result < 0) {
+ printk("dlm: Can't create listening comms socket\n");
+ goto create_out;
+ }
+
+ fs = get_fs();
+ set_fs(get_ds());
+ result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one));
+ set_fs(fs);
+ if (result < 0) {
+ printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",
+ result);
+ }
+ sock->sk->sk_user_data = con;
+ con->rx_action = accept_from_sock;
+ con->sock = sock;
+
+ /* Bind to our port */
+ make_sockaddr(saddr, dlm_config.tcp_port, &addr_len);
+ result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
+ if (result < 0) {
+ printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port);
+ sock_release(sock);
+ sock = NULL;
+ con->sock = NULL;
+ goto create_out;
+ }
+
+ fs = get_fs();
+ set_fs(get_ds());
+
+ result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+ (char *)&one, sizeof(one));
+ set_fs(fs);
+ if (result < 0) {
+ printk("dlm: Set keepalive failed: %d\n", result);
+ }
+
+ result = sock->ops->listen(sock, 5);
+ if (result < 0) {
+ printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port);
+ sock_release(sock);
+ sock = NULL;
+ goto create_out;
+ }
+
+create_out:
+ return sock;
+}
+
+
+/* Listen on all interfaces */
+static int listen_for_all(void)
+{
+ struct socket *sock = NULL;
+ struct connection *con = nodeid2con(0, GFP_KERNEL);
+ int result = -EINVAL;
+
+ /* We don't support multi-homed hosts */
+ set_bit(CF_IS_OTHERCON, &con->flags);
+
+ sock = create_listen_sock(con, &dlm_local_addr);
+ if (sock) {
+ add_sock(sock, con);
+ result = 0;
+ }
+ else {
+ result = -EADDRINUSE;
+ }
+
+ return result;
+}
+
+
+
+static struct writequeue_entry *new_writequeue_entry(struct connection *con,
+ gfp_t allocation)
+{
+ struct writequeue_entry *entry;
+
+ entry = kmalloc(sizeof(struct writequeue_entry), allocation);
+ if (!entry)
+ return NULL;
+
+ entry->page = alloc_page(allocation);
+ if (!entry->page) {
+ kfree(entry);
+ return NULL;
+ }
+
+ entry->offset = 0;
+ entry->len = 0;
+ entry->end = 0;
+ entry->users = 0;
+ entry->con = con;
+
+ return entry;
+}
+
+void *dlm_lowcomms_get_buffer(int nodeid, int len,
+ gfp_t allocation, char **ppc)
+{
+ struct connection *con;
+ struct writequeue_entry *e;
+ int offset = 0;
+ int users = 0;
+
+ con = nodeid2con(nodeid, allocation);
+ if (!con)
+ return NULL;
+
+ e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
+ if ((&e->list == &con->writequeue) ||
+ (PAGE_CACHE_SIZE - e->end < len)) {
+ e = NULL;
+ } else {
+ offset = e->end;
+ e->end += len;
+ users = e->users++;
+ }
+ spin_unlock(&con->writequeue_lock);
+
+ if (e) {
+ got_one:
+ if (users == 0)
+ kmap(e->page);
+ *ppc = page_address(e->page) + offset;
+ return e;
+ }
+
+ e = new_writequeue_entry(con, allocation);
+ if (e) {
+ spin_lock(&con->writequeue_lock);
+ offset = e->end;
+ e->end += len;
+ users = e->users++;
+ list_add_tail(&e->list, &con->writequeue);
+ spin_unlock(&con->writequeue_lock);
+ goto got_one;
+ }
+ return NULL;
+}
+
+void dlm_lowcomms_commit_buffer(void *mh)
+{
+ struct writequeue_entry *e = (struct writequeue_entry *)mh;
+ struct connection *con = e->con;
+ int users;
+
+ users = --e->users;
+ if (users)
+ goto out;
+ e->len = e->end - e->offset;
+ kunmap(e->page);
+ spin_unlock(&con->writequeue_lock);
+
+ if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) {
+ spin_lock_bh(&write_sockets_lock);
+ list_add_tail(&con->write_list, &write_sockets);
+ spin_unlock_bh(&write_sockets_lock);
+
+ wake_up_interruptible(&lowcomms_send_waitq);
+ }
+ return;
+
+out:
+ spin_unlock(&con->writequeue_lock);
+ return;
+}
+
+static void free_entry(struct writequeue_entry *e)
+{
+ __free_page(e->page);
+ kfree(e);
+}
+
+/* Send a message */
+static void send_to_sock(struct connection *con)
+{
+ int ret = 0;
+ ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
+ const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+ struct writequeue_entry *e;
+ int len, offset;
+
+ down_read(&con->sock_sem);
+ if (con->sock == NULL)
+ goto out_connect;
+
+ sendpage = con->sock->ops->sendpage;
+
+ spin_lock(&con->writequeue_lock);
+ for (;;) {
+ e = list_entry(con->writequeue.next, struct writequeue_entry,
+ list);
+ if ((struct list_head *) e == &con->writequeue)
+ break;
+
+ len = e->len;
+ offset = e->offset;
+ BUG_ON(len == 0 && e->users == 0);
+ spin_unlock(&con->writequeue_lock);
+
+ ret = 0;
+ if (len) {
+ ret = sendpage(con->sock, e->page, offset, len,
+ msg_flags);
+ if (ret == -EAGAIN || ret == 0)
+ goto out;
+ if (ret <= 0)
+ goto send_error;
+ }
+ else {
+ /* Don't starve people filling buffers */
+ cond_resched();
+ }
+
+ spin_lock(&con->writequeue_lock);
+ e->offset += ret;
+ e->len -= ret;
+
+ if (e->len == 0 && e->users == 0) {
+ list_del(&e->list);
+ kunmap(e->page);
+ free_entry(e);
+ continue;
+ }
+ }
+ spin_unlock(&con->writequeue_lock);
+out:
+ up_read(&con->sock_sem);
+ return;
+
+send_error:
+ up_read(&con->sock_sem);
+ close_connection(con, false);
+ lowcomms_connect_sock(con);
+ return;
+
+out_connect:
+ up_read(&con->sock_sem);
+ lowcomms_connect_sock(con);
+ return;
+}
+
+static void clean_one_writequeue(struct connection *con)
+{
+ struct list_head *list;
+ struct list_head *temp;
+
+ spin_lock(&con->writequeue_lock);
+ list_for_each_safe(list, temp, &con->writequeue) {
+ struct writequeue_entry *e =
+ list_entry(list, struct writequeue_entry, list);
+ list_del(&e->list);
+ free_entry(e);
+ }
+ spin_unlock(&con->writequeue_lock);
+}
+
+/* Called from recovery when it knows that a node has
+ left the cluster */
+int dlm_lowcomms_close(int nodeid)
+{
+ struct connection *con;
+
+ if (!connections)
+ goto out;
+
+ log_print("closing connection to node %d", nodeid);
+ con = nodeid2con(nodeid, 0);
+ if (con) {
+ clean_one_writequeue(con);
+ close_connection(con, true);
+ atomic_set(&con->waiting_requests, 0);
+ }
+ return 0;
+
+out:
+ return -1;
+}
+
+/* API send message call, may queue the request */
+/* N.B. This is the old interface - use the new one for new calls */
+int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
+{
+ struct writequeue_entry *e;
+ char *b;
+
+ e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
+ if (e) {
+ memcpy(b, buf, len);
+ dlm_lowcomms_commit_buffer(e);
+ return 0;
+ }
+ return -ENOBUFS;
+}
+
+/* Look for activity on active sockets */
+static void process_sockets(void)
+{
+ struct list_head *list;
+ struct list_head *temp;
+ int count = 0;
+
+ spin_lock_bh(&read_sockets_lock);
+ list_for_each_safe(list, temp, &read_sockets) {
+
+ struct connection *con =
+ list_entry(list, struct connection, read_list);
+ list_del(&con->read_list);
+ clear_bit(CF_READ_PENDING, &con->flags);
+
+ spin_unlock_bh(&read_sockets_lock);
+
+ /* This can reach zero if we are processing requests
+ * as they come in.
+ */
+ if (atomic_read(&con->waiting_requests) == 0) {
+ spin_lock_bh(&read_sockets_lock);
+ continue;
+ }
+
+ do {
+ con->rx_action(con);
+
+ /* Don't starve out everyone else */
+ if (++count >= MAX_RX_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+
+ } while (!atomic_dec_and_test(&con->waiting_requests) &&
+ !kthread_should_stop());
+
+ spin_lock_bh(&read_sockets_lock);
+ }
+ spin_unlock_bh(&read_sockets_lock);
+}
+
+/* Try to send any messages that are pending
+ */
+static void process_output_queue(void)
+{
+ struct list_head *list;
+ struct list_head *temp;
+
+ spin_lock_bh(&write_sockets_lock);
+ list_for_each_safe(list, temp, &write_sockets) {
+ struct connection *con =
+ list_entry(list, struct connection, write_list);
+ clear_bit(CF_WRITE_PENDING, &con->flags);
+ list_del(&con->write_list);
+
+ spin_unlock_bh(&write_sockets_lock);
+ send_to_sock(con);
+ spin_lock_bh(&write_sockets_lock);
+ }
+ spin_unlock_bh(&write_sockets_lock);
+}
+
+static void process_state_queue(void)
+{
+ struct list_head *list;
+ struct list_head *temp;
+
+ spin_lock_bh(&state_sockets_lock);
+ list_for_each_safe(list, temp, &state_sockets) {
+ struct connection *con =
+ list_entry(list, struct connection, state_list);
+ list_del(&con->state_list);
+ clear_bit(CF_CONNECT_PENDING, &con->flags);
+ spin_unlock_bh(&state_sockets_lock);
+
+ connect_to_sock(con);
+ spin_lock_bh(&state_sockets_lock);
+ }
+ spin_unlock_bh(&state_sockets_lock);
+}
+
+
+/* Discard all entries on the write queues */
+static void clean_writequeues(void)
+{
+ int nodeid;
+
+ for (nodeid = 1; nodeid < conn_array_size; nodeid++) {
+ struct connection *con = nodeid2con(nodeid, 0);
+
+ if (con)
+ clean_one_writequeue(con);
+ }
+}
+
+static int read_list_empty(void)
+{
+ int status;
+
+ spin_lock_bh(&read_sockets_lock);
+ status = list_empty(&read_sockets);
+ spin_unlock_bh(&read_sockets_lock);
+
+ return status;
+}
+
+/* DLM Transport comms receive daemon */
+static int dlm_recvd(void *data)
+{
+ init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
+ add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
+
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (read_list_empty())
+ cond_resched();
+ set_current_state(TASK_RUNNING);
+
+ process_sockets();
+ }
+
+ return 0;
+}
+
+static int write_and_state_lists_empty(void)
+{
+ int status;
+
+ spin_lock_bh(&write_sockets_lock);
+ status = list_empty(&write_sockets);
+ spin_unlock_bh(&write_sockets_lock);
+
+ spin_lock_bh(&state_sockets_lock);
+ if (list_empty(&state_sockets) == 0)
+ status = 0;
+ spin_unlock_bh(&state_sockets_lock);
+
+ return status;
+}
+
+/* DLM Transport send daemon */
+static int dlm_sendd(void *data)
+{
+ init_waitqueue_entry(&lowcomms_send_waitq_head, current);
+ add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
+
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (write_and_state_lists_empty())
+ cond_resched();
+ set_current_state(TASK_RUNNING);
+
+ process_state_queue();
+ process_output_queue();
+ }
+
+ return 0;
+}
+
+static void daemons_stop(void)
+{
+ kthread_stop(recv_task);
+ kthread_stop(send_task);
+}
+
+static int daemons_start(void)
+{
+ struct task_struct *p;
+ int error;
+
+ p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
+ error = IS_ERR(p);
+ if (error) {
+ log_print("can't start dlm_recvd %d", error);
+ return error;
+ }
+ recv_task = p;
+
+ p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
+ error = IS_ERR(p);
+ if (error) {
+ log_print("can't start dlm_sendd %d", error);
+ kthread_stop(recv_task);
+ return error;
+ }
+ send_task = p;
+
+ return 0;
+}
+
+/*
+ * Return the largest buffer size we can cope with.
+ */
+int lowcomms_max_buffer_size(void)
+{
+ return PAGE_CACHE_SIZE;
+}
+
+void dlm_lowcomms_stop(void)
+{
+ int i;
+
+ /* Set all the flags to prevent any
+ socket activity.
+ */
+ for (i = 0; i < conn_array_size; i++) {
+ if (connections[i])
+ connections[i]->flags |= 0xFF;
+ }
+
+ daemons_stop();
+ clean_writequeues();
+
+ for (i = 0; i < conn_array_size; i++) {
+ if (connections[i]) {
+ close_connection(connections[i], true);
+ if (connections[i]->othercon)
+ kmem_cache_free(con_cache, connections[i]->othercon);
+ kmem_cache_free(con_cache, connections[i]);
+ }
+ }
+
+ kfree(connections);
+ connections = NULL;
+
+ kmem_cache_destroy(con_cache);
+}
+
+/* This is quite likely to sleep... */
+int dlm_lowcomms_start(void)
+{
+ int error = 0;
+
+ error = -ENOMEM;
+ connections = kzalloc(sizeof(struct connection *) *
+ NODE_INCREMENT, GFP_KERNEL);
+ if (!connections)
+ goto out;
+
+ conn_array_size = NODE_INCREMENT;
+
+ if (dlm_our_addr(&dlm_local_addr, 0)) {
+ log_print("no local IP address has been set");
+ goto fail_free_conn;
+ }
+ if (!dlm_our_addr(&dlm_local_addr, 1)) {
+ log_print("This dlm comms module does not support multi-homed clustering");
+ goto fail_free_conn;
+ }
+
+ con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
+ __alignof__(struct connection), 0,
+ NULL, NULL);
+ if (!con_cache)
+ goto fail_free_conn;
+
+
+ /* Start listening */
+ error = listen_for_all();
+ if (error)
+ goto fail_unlisten;
+
+ error = daemons_start();
+ if (error)
+ goto fail_unlisten;
+
+ return 0;
+
+fail_unlisten:
+ close_connection(connections[0], false);
+ kmem_cache_free(con_cache, connections[0]);
+ kmem_cache_destroy(con_cache);
+
+fail_free_conn:
+ kfree(connections);
+
+out:
+ return error;
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 2d045e0daae..a9a9618c0d3 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -14,8 +14,6 @@
#ifndef __LOWCOMMS_DOT_H__
#define __LOWCOMMS_DOT_H__
-int dlm_lowcomms_init(void);
-void dlm_lowcomms_exit(void);
int dlm_lowcomms_start(void);
void dlm_lowcomms_stop(void);
int dlm_lowcomms_close(int nodeid);
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index a8da8dc36b2..162fbae58fe 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -16,7 +16,6 @@
#include "lock.h"
#include "user.h"
#include "memory.h"
-#include "lowcomms.h"
#include "config.h"
#ifdef CONFIG_DLM_DEBUG
@@ -47,20 +46,14 @@ static int __init init_dlm(void)
if (error)
goto out_config;
- error = dlm_lowcomms_init();
- if (error)
- goto out_debug;
-
error = dlm_user_init();
if (error)
- goto out_lowcomms;
+ goto out_debug;
printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
- out_lowcomms:
- dlm_lowcomms_exit();
out_debug:
dlm_unregister_debugfs();
out_config:
@@ -76,7 +69,6 @@ static int __init init_dlm(void)
static void __exit exit_dlm(void)
{
dlm_user_exit();
- dlm_lowcomms_exit();
dlm_config_exit();
dlm_memory_exit();
dlm_lockspace_exit();
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index a3f7de7f3a8..85e2897bd74 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -186,6 +186,14 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
struct dlm_member *memb, *safe;
int i, error, found, pos = 0, neg = 0, low = -1;
+ /* previously removed members that we've not finished removing need to
+ count as a negative change so the "neg" recovery steps will happen */
+
+ list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
+ log_debug(ls, "prev removed member %d", memb->nodeid);
+ neg++;
+ }
+
/* move departed members from ls_nodes to ls_nodes_gone */
list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 518239a8b1e..4cc31be9cd9 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -90,13 +90,28 @@ static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid)
return 0;
}
+static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq)
+{
+ spin_lock(&ls->ls_rcom_spin);
+ *new_seq = ++ls->ls_rcom_seq;
+ set_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+ spin_unlock(&ls->ls_rcom_spin);
+}
+
+static void disallow_sync_reply(struct dlm_ls *ls)
+{
+ spin_lock(&ls->ls_rcom_spin);
+ clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+ clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+ spin_unlock(&ls->ls_rcom_spin);
+}
+
int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error = 0;
- memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
ls->ls_recover_nodeid = nodeid;
if (nodeid == dlm_our_nodeid()) {
@@ -108,12 +123,14 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh);
if (error)
goto out;
- rc->rc_id = ++ls->ls_rcom_seq;
+
+ allow_sync_reply(ls, &rc->rc_id);
+ memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
send_rcom(ls, mh, rc);
error = dlm_wait_function(ls, &rcom_response);
- clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+ disallow_sync_reply(ls);
if (error)
goto out;
@@ -150,14 +167,21 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
- if (rc_in->rc_id != ls->ls_rcom_seq) {
- log_debug(ls, "reject old reply %d got %llx wanted %llx",
- rc_in->rc_type, rc_in->rc_id, ls->ls_rcom_seq);
- return;
+ spin_lock(&ls->ls_rcom_spin);
+ if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
+ rc_in->rc_id != ls->ls_rcom_seq) {
+ log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
+ rc_in->rc_type, rc_in->rc_header.h_nodeid,
+ (unsigned long long)rc_in->rc_id,
+ (unsigned long long)ls->ls_rcom_seq);
+ goto out;
}
memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
set_bit(LSFL_RCOM_READY, &ls->ls_flags);
+ clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
wake_up(&ls->ls_wait_general);
+ out:
+ spin_unlock(&ls->ls_rcom_spin);
}
static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -171,7 +195,6 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
struct dlm_mhandle *mh;
int error = 0, len = sizeof(struct dlm_rcom);
- memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
ls->ls_recover_nodeid = nodeid;
if (nodeid == dlm_our_nodeid()) {
@@ -185,12 +208,14 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
if (error)
goto out;
memcpy(rc->rc_buf, last_name, last_len);
- rc->rc_id = ++ls->ls_rcom_seq;
+
+ allow_sync_reply(ls, &rc->rc_id);
+ memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
send_rcom(ls, mh, rc);
error = dlm_wait_function(ls, &rcom_response);
- clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+ disallow_sync_reply(ls);
out:
return error;
}
@@ -370,9 +395,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
+ struct rcom_config *rf;
struct dlm_mhandle *mh;
char *mb;
- int mb_len = sizeof(struct dlm_rcom);
+ int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
if (!mh)
@@ -391,6 +417,9 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
rc->rc_id = rc_in->rc_id;
rc->rc_result = -ESRCH;
+ rf = (struct rcom_config *) rc->rc_buf;
+ rf->rf_lvblen = -1;
+
dlm_rcom_out(rc);
dlm_lowcomms_commit_buffer(mh);
@@ -412,9 +441,10 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
- log_print("lockspace %x from %d not found",
- hd->h_lockspace, nodeid);
- send_ls_not_ready(nodeid, rc);
+ log_print("lockspace %x from %d type %x not found",
+ hd->h_lockspace, nodeid, rc->rc_type);
+ if (rc->rc_type == DLM_RCOM_STATUS)
+ send_ls_not_ready(nodeid, rc);
return;
}
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index a5e6d184872..cf9f6831bab 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -252,6 +252,7 @@ static void recover_list_clear(struct dlm_ls *ls)
spin_lock(&ls->ls_recover_list_lock);
list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
list_del_init(&r->res_recover_list);
+ r->res_recover_locks_count = 0;
dlm_put_rsb(r);
ls->ls_recover_list_count--;
}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 362e3eff4dc..650536aa513 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -45,7 +45,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
unsigned long start;
int error, neg = 0;
- log_debug(ls, "recover %llx", rv->seq);
+ log_debug(ls, "recover %llx", (unsigned long long)rv->seq);
mutex_lock(&ls->ls_recoverd_active);
@@ -94,14 +94,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
}
/*
- * Purge directory-related requests that are saved in requestqueue.
- * All dir requests from before recovery are invalid now due to the dir
- * rebuild and will be resent by the requesting nodes.
- */
-
- dlm_purge_requestqueue(ls);
-
- /*
* Wait for all nodes to complete directory rebuild.
*/
@@ -164,10 +156,31 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
*/
dlm_recover_rsbs(ls);
+ } else {
+ /*
+ * Other lockspace members may be going through the "neg" steps
+ * while also adding us to the lockspace, in which case they'll
+ * be doing the recover_locks (RS_LOCKS) barrier.
+ */
+ dlm_set_recover_status(ls, DLM_RS_LOCKS);
+
+ error = dlm_recover_locks_wait(ls);
+ if (error) {
+ log_error(ls, "recover_locks_wait failed %d", error);
+ goto fail;
+ }
}
dlm_release_root_list(ls);
+ /*
+ * Purge directory-related requests that are saved in requestqueue.
+ * All dir requests from before recovery are invalid now due to the dir
+ * rebuild and will be resent by the requesting nodes.
+ */
+
+ dlm_purge_requestqueue(ls);
+
dlm_set_recover_status(ls, DLM_RS_DONE);
error = dlm_recover_done_wait(ls);
if (error) {
@@ -199,7 +212,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_astd_wake();
- log_debug(ls, "recover %llx done: %u ms", rv->seq,
+ log_debug(ls, "recover %llx done: %u ms",
+ (unsigned long long)rv->seq,
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
@@ -207,11 +221,16 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
fail:
dlm_release_root_list(ls);
- log_debug(ls, "recover %llx error %d", rv->seq, error);
+ log_debug(ls, "recover %llx error %d",
+ (unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
return error;
}
+/* The dlm_ls_start() that created the rv we take here may already have been
+ stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
+ flag set. */
+
static void do_ls_recovery(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL;
@@ -219,7 +238,8 @@ static void do_ls_recovery(struct dlm_ls *ls)
spin_lock(&ls->ls_recover_lock);
rv = ls->ls_recover_args;
ls->ls_recover_args = NULL;
- clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+ if (rv && ls->ls_recover_seq == rv->seq)
+ clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
spin_unlock(&ls->ls_recover_lock);
if (rv) {
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 7b2b089634a..65008d79c96 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -30,26 +30,36 @@ struct rq_entry {
* lockspace is enabled on some while still suspended on others.
*/
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
{
struct rq_entry *e;
int length = hd->h_length;
-
- if (dlm_is_removed(ls, nodeid))
- return;
+ int rv = 0;
e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
if (!e) {
log_print("dlm_add_requestqueue: out of memory\n");
- return;
+ return 0;
}
e->nodeid = nodeid;
memcpy(e->request, hd, length);
+ /* We need to check dlm_locking_stopped() after taking the mutex to
+ avoid a race where dlm_recoverd enables locking and runs
+ process_requestqueue between our earlier dlm_locking_stopped check
+ and this addition to the requestqueue. */
+
mutex_lock(&ls->ls_requestqueue_mutex);
- list_add_tail(&e->list, &ls->ls_requestqueue);
+ if (dlm_locking_stopped(ls))
+ list_add_tail(&e->list, &ls->ls_requestqueue);
+ else {
+ log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
+ kfree(e);
+ rv = -EAGAIN;
+ }
mutex_unlock(&ls->ls_requestqueue_mutex);
+ return rv;
}
int dlm_process_requestqueue(struct dlm_ls *ls)
@@ -120,6 +130,10 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
{
uint32_t type = ms->m_type;
+ /* the ls is being cleaned up and freed by release_lockspace */
+ if (!ls->ls_count)
+ return 1;
+
if (dlm_is_removed(ls, nodeid))
return 1;
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h
index 349f0d292d9..6a53ea03335 100644
--- a/fs/dlm/requestqueue.h
+++ b/fs/dlm/requestqueue.h
@@ -13,7 +13,7 @@
#ifndef __REQUESTQUEUE_DOT_H__
#define __REQUESTQUEUE_DOT_H__
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
int dlm_process_requestqueue(struct dlm_ls *ls);
void dlm_wait_requestqueue(struct dlm_ls *ls);
void dlm_purge_requestqueue(struct dlm_ls *ls);
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 8c27de8b956..c0791cbacad 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -2,6 +2,7 @@ config GFS2_FS
tristate "GFS2 file system support"
depends on EXPERIMENTAL
select FS_POSIX_ACL
+ select CRC32
help
A cluster filesystem.
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 5f959b8ce40..6e80844367e 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -74,11 +74,11 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
{
if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl)
return -EOPNOTSUPP;
- if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
+ if (current->fsuid != ip->i_inode.i_uid && !capable(CAP_FOWNER))
return -EPERM;
- if (S_ISLNK(ip->i_di.di_mode))
+ if (S_ISLNK(ip->i_inode.i_mode))
return -EOPNOTSUPP;
- if (!access && !S_ISDIR(ip->i_di.di_mode))
+ if (!access && !S_ISDIR(ip->i_inode.i_mode))
return -EACCES;
return 0;
@@ -145,14 +145,14 @@ out:
}
/**
- * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
+ * gfs2_check_acl - Check an ACL to see if we're allowed to do something
* @inode: the file we want to do something to
* @mask: what we want to do
*
* Returns: errno
*/
-int gfs2_check_acl_locked(struct inode *inode, int mask)
+int gfs2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = NULL;
int error;
@@ -170,21 +170,6 @@ int gfs2_check_acl_locked(struct inode *inode, int mask)
return -EAGAIN;
}
-int gfs2_check_acl(struct inode *inode, int mask)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder i_gh;
- int error;
-
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
- if (!error) {
- error = gfs2_check_acl_locked(inode, mask);
- gfs2_glock_dq_uninit(&i_gh);
- }
-
- return error;
-}
-
static int munge_mode(struct gfs2_inode *ip, mode_t mode)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -198,10 +183,10 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode)
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_assert_withdraw(sdp,
- (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
- ip->i_di.di_mode = mode;
+ (ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT));
+ ip->i_inode.i_mode = mode;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
@@ -215,12 +200,12 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct posix_acl *acl = NULL, *clone;
struct gfs2_ea_request er;
- mode_t mode = ip->i_di.di_mode;
+ mode_t mode = ip->i_inode.i_mode;
int error;
if (!sdp->sd_args.ar_posix_acl)
return 0;
- if (S_ISLNK(ip->i_di.di_mode))
+ if (S_ISLNK(ip->i_inode.i_mode))
return 0;
memset(&er, 0, sizeof(struct gfs2_ea_request));
@@ -232,7 +217,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
return error;
if (!acl) {
mode &= ~current->fs->umask;
- if (mode != ip->i_di.di_mode)
+ if (mode != ip->i_inode.i_mode)
error = munge_mode(ip, mode);
return error;
}
@@ -244,7 +229,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
posix_acl_release(acl);
acl = clone;
- if (S_ISDIR(ip->i_di.di_mode)) {
+ if (S_ISDIR(ip->i_inode.i_mode)) {
er.er_name = GFS2_POSIX_ACL_DEFAULT;
er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
error = gfs2_system_eaops.eo_set(ip, &er);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 05c294fe0d7..6751930bfb6 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -31,7 +31,6 @@ int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
struct gfs2_ea_request *er,
int *remove, mode_t *mode);
int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
-int gfs2_check_acl_locked(struct inode *inode, int mask);
int gfs2_check_acl(struct inode *inode, int mask);
int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 06e9a8cb45e..8240c1ff94f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -38,8 +38,8 @@ struct metapath {
};
typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
- struct buffer_head *bh, u64 *top,
- u64 *bottom, unsigned int height,
+ struct buffer_head *bh, __be64 *top,
+ __be64 *bottom, unsigned int height,
void *data);
struct strip_mine {
@@ -163,6 +163,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
if (ip->i_di.di_size) {
*(__be64 *)(di + 1) = cpu_to_be64(block);
ip->i_di.di_blocks++;
+ gfs2_set_inode_blocks(&ip->i_inode);
di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
}
@@ -230,7 +231,7 @@ static int build_height(struct inode *inode, unsigned height)
struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
struct gfs2_dinode *di;
int error;
- u64 *bp;
+ __be64 *bp;
u64 bn;
unsigned n;
@@ -255,7 +256,7 @@ static int build_height(struct inode *inode, unsigned height)
GFS2_FORMAT_IN);
gfs2_buffer_clear_tail(blocks[n],
sizeof(struct gfs2_meta_header));
- bp = (u64 *)(blocks[n]->b_data +
+ bp = (__be64 *)(blocks[n]->b_data +
sizeof(struct gfs2_meta_header));
*bp = cpu_to_be64(blocks[n+1]->b_blocknr);
brelse(blocks[n]);
@@ -272,6 +273,7 @@ static int build_height(struct inode *inode, unsigned height)
*(__be64 *)(di + 1) = cpu_to_be64(bn);
ip->i_di.di_height += new_height;
ip->i_di.di_blocks += new_height;
+ gfs2_set_inode_blocks(&ip->i_inode);
di->di_height = cpu_to_be16(ip->i_di.di_height);
di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
brelse(dibh);
@@ -360,15 +362,15 @@ static void find_metapath(struct gfs2_inode *ip, u64 block,
* metadata tree.
*/
-static inline u64 *metapointer(struct buffer_head *bh, int *boundary,
+static inline __be64 *metapointer(struct buffer_head *bh, int *boundary,
unsigned int height, const struct metapath *mp)
{
unsigned int head_size = (height > 0) ?
sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
- u64 *ptr;
+ __be64 *ptr;
*boundary = 0;
- ptr = ((u64 *)(bh->b_data + head_size)) + mp->mp_list[height];
- if (ptr + 1 == (u64 *)(bh->b_data + bh->b_size))
+ ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
+ if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size))
*boundary = 1;
return ptr;
}
@@ -394,7 +396,7 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
int *new, u64 *block)
{
int boundary;
- u64 *ptr = metapointer(bh, &boundary, height, mp);
+ __be64 *ptr = metapointer(bh, &boundary, height, mp);
if (*ptr) {
*block = be64_to_cpu(*ptr);
@@ -415,17 +417,35 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
*ptr = cpu_to_be64(*block);
ip->i_di.di_blocks++;
+ gfs2_set_inode_blocks(&ip->i_inode);
*new = 1;
return 0;
}
+static inline void bmap_lock(struct inode *inode, int create)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ if (create)
+ down_write(&ip->i_rw_mutex);
+ else
+ down_read(&ip->i_rw_mutex);
+}
+
+static inline void bmap_unlock(struct inode *inode, int create)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ if (create)
+ up_write(&ip->i_rw_mutex);
+ else
+ up_read(&ip->i_rw_mutex);
+}
+
/**
- * gfs2_block_pointers - Map a block from an inode to a disk block
+ * gfs2_block_map - Map a block from an inode to a disk block
* @inode: The inode
* @lblock: The logical block number
- * @map_bh: The bh to be mapped
- * @mp: metapath to use
+ * @bh_map: The bh to be mapped
*
* Find the block number on the current device which corresponds to an
* inode's block. If the block had to be created, "new" will be set.
@@ -433,8 +453,8 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
* Returns: errno
*/
-static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
- struct buffer_head *bh_map, struct metapath *mp)
+int gfs2_block_map(struct inode *inode, u64 lblock, int create,
+ struct buffer_head *bh_map)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -448,57 +468,61 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
u64 dblock = 0;
int boundary;
unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
+ struct metapath mp;
+ u64 size;
BUG_ON(maxlen == 0);
if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
return 0;
+ bmap_lock(inode, create);
+ clear_buffer_mapped(bh_map);
+ clear_buffer_new(bh_map);
+ clear_buffer_boundary(bh_map);
bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
-
- height = calc_tree_height(ip, (lblock + 1) * bsize);
- if (ip->i_di.di_height < height) {
- if (!create)
- return 0;
-
- error = build_height(inode, height);
- if (error)
- return error;
+ size = (lblock + 1) * bsize;
+
+ if (size > ip->i_di.di_size) {
+ height = calc_tree_height(ip, size);
+ if (ip->i_di.di_height < height) {
+ if (!create)
+ goto out_ok;
+
+ error = build_height(inode, height);
+ if (error)
+ goto out_fail;
+ }
}
- find_metapath(ip, lblock, mp);
+ find_metapath(ip, lblock, &mp);
end_of_metadata = ip->i_di.di_height - 1;
-
error = gfs2_meta_inode_buffer(ip, &bh);
if (error)
- return error;
+ goto out_fail;
for (x = 0; x < end_of_metadata; x++) {
- lookup_block(ip, bh, x, mp, create, &new, &dblock);
+ lookup_block(ip, bh, x, &mp, create, &new, &dblock);
brelse(bh);
if (!dblock)
- return 0;
+ goto out_ok;
error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh);
if (error)
- return error;
+ goto out_fail;
}
- boundary = lookup_block(ip, bh, end_of_metadata, mp, create, &new, &dblock);
- clear_buffer_mapped(bh_map);
- clear_buffer_new(bh_map);
- clear_buffer_boundary(bh_map);
-
+ boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock);
if (dblock) {
map_bh(bh_map, inode->i_sb, dblock);
if (boundary)
- set_buffer_boundary(bh);
+ set_buffer_boundary(bh_map);
if (new) {
struct buffer_head *dibh;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
set_buffer_new(bh_map);
@@ -507,8 +531,8 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
while(--maxlen && !buffer_boundary(bh_map)) {
u64 eblock;
- mp->mp_list[end_of_metadata]++;
- boundary = lookup_block(ip, bh, end_of_metadata, mp, 0, &new, &eblock);
+ mp.mp_list[end_of_metadata]++;
+ boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock);
if (eblock != ++dblock)
break;
bh_map->b_size += (1 << inode->i_blkbits);
@@ -518,43 +542,15 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
}
out_brelse:
brelse(bh);
- return 0;
-}
-
-
-static inline void bmap_lock(struct inode *inode, int create)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- if (create)
- down_write(&ip->i_rw_mutex);
- else
- down_read(&ip->i_rw_mutex);
-}
-
-static inline void bmap_unlock(struct inode *inode, int create)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- if (create)
- up_write(&ip->i_rw_mutex);
- else
- up_read(&ip->i_rw_mutex);
-}
-
-int gfs2_block_map(struct inode *inode, u64 lblock, int create,
- struct buffer_head *bh)
-{
- struct metapath mp;
- int ret;
-
- bmap_lock(inode, create);
- ret = gfs2_block_pointers(inode, lblock, create, bh, &mp);
+out_ok:
+ error = 0;
+out_fail:
bmap_unlock(inode, create);
- return ret;
+ return error;
}
int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
{
- struct metapath mp;
struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
int ret;
int create = *new;
@@ -564,9 +560,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
BUG_ON(!new);
bh.b_size = 1 << (inode->i_blkbits + 5);
- bmap_lock(inode, create);
- ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp);
- bmap_unlock(inode, create);
+ ret = gfs2_block_map(inode, lblock, create, &bh);
*extlen = bh.b_size >> inode->i_blkbits;
*dblock = bh.b_blocknr;
if (buffer_new(&bh))
@@ -600,7 +594,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *bh = NULL;
- u64 *top, *bottom;
+ __be64 *top, *bottom;
u64 bn;
int error;
int mh_size = sizeof(struct gfs2_meta_header);
@@ -611,17 +605,17 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
return error;
dibh = bh;
- top = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
- bottom = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
+ top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
+ bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
} else {
error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
if (error)
return error;
- top = (u64 *)(bh->b_data + mh_size) +
+ top = (__be64 *)(bh->b_data + mh_size) +
(first ? mp->mp_list[height] : 0);
- bottom = (u64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
+ bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
}
error = bc(ip, dibh, bh, top, bottom, height, data);
@@ -660,7 +654,7 @@ out:
*/
static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
- struct buffer_head *bh, u64 *top, u64 *bottom,
+ struct buffer_head *bh, __be64 *top, __be64 *bottom,
unsigned int height, void *data)
{
struct strip_mine *sm = data;
@@ -668,7 +662,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
struct gfs2_rgrp_list rlist;
u64 bn, bstart;
u32 blen;
- u64 *p;
+ __be64 *p;
unsigned int rg_blocks = 0;
int metadata;
unsigned int revokes = 0;
@@ -770,6 +764,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
if (!ip->i_di.di_blocks)
gfs2_consist_inode(ip);
ip->i_di.di_blocks--;
+ gfs2_set_inode_blocks(&ip->i_inode);
}
if (bstart) {
if (metadata)
@@ -778,9 +773,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
gfs2_free_data(ip, bstart, blen);
}
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
up_write(&ip->i_rw_mutex);
@@ -819,7 +814,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
if (error)
goto out;
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+ error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
if (error)
goto out_gunlock_q;
@@ -853,14 +848,14 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
}
ip->i_di.di_size = size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto out_end_trans;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
out_end_trans:
@@ -968,9 +963,9 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (gfs2_is_stuffed(ip)) {
ip->i_di.di_size = size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
error = 1;
@@ -980,10 +975,10 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (!error) {
ip->i_di.di_size = size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
}
}
@@ -1053,11 +1048,11 @@ static int trunc_end(struct gfs2_inode *ip)
ip->i_num.no_addr;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
}
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
out:
@@ -1109,7 +1104,7 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
{
int error;
- if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_di.di_mode)))
+ if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode)))
return -EINVAL;
if (size > ip->i_di.di_size)
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index cab1f68d468..683cb5bda87 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -112,6 +112,7 @@ int gfs2_logd(void *data)
struct gfs2_sbd *sdp = data;
struct gfs2_holder ji_gh;
unsigned long t;
+ int need_flush;
while (!kthread_should_stop()) {
/* Advance the log tail */
@@ -120,8 +121,10 @@ int gfs2_logd(void *data)
gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
gfs2_ail1_empty(sdp, DIO_ALL);
-
- if (time_after_eq(jiffies, t)) {
+ gfs2_log_lock(sdp);
+ need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
+ gfs2_log_unlock(sdp);
+ if (need_flush || time_after_eq(jiffies, t)) {
gfs2_log_flush(sdp, NULL);
sdp->sd_log_flush_time = jiffies;
}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index e24af28b1a1..0fdcb7713cd 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -131,8 +131,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
if (ip->i_di.di_size < offset + size)
ip->i_di.di_size = offset + size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -229,10 +229,10 @@ out:
if (ip->i_di.di_size < offset + copied)
ip->i_di.di_size = offset + copied;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
return copied;
@@ -340,10 +340,15 @@ fail:
return (copied) ? copied : error;
}
+static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
+{
+ return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
+}
+
static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
const struct qstr *name, int ret)
{
- if (dent->de_inum.no_addr != 0 &&
+ if (!gfs2_dirent_sentinel(dent) &&
be32_to_cpu(dent->de_hash) == name->hash &&
be16_to_cpu(dent->de_name_len) == name->len &&
memcmp(dent+1, name->name, name->len) == 0)
@@ -388,7 +393,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
unsigned totlen = be16_to_cpu(dent->de_rec_len);
- if (!dent->de_inum.no_addr)
+ if (gfs2_dirent_sentinel(dent))
actual = GFS2_DIRENT_SIZE(0);
if (totlen - actual >= required)
return 1;
@@ -405,7 +410,7 @@ static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
void *opaque)
{
struct dirent_gather *g = opaque;
- if (dent->de_inum.no_addr) {
+ if (!gfs2_dirent_sentinel(dent)) {
g->pdent[g->offset++] = dent;
}
return 0;
@@ -433,10 +438,10 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
if (unlikely(offset + size > len))
goto error;
msg = "zero inode number";
- if (unlikely(!first && !dent->de_inum.no_addr))
+ if (unlikely(!first && gfs2_dirent_sentinel(dent)))
goto error;
msg = "name length is greater than space in dirent";
- if (dent->de_inum.no_addr &&
+ if (!gfs2_dirent_sentinel(dent) &&
unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
size))
goto error;
@@ -598,7 +603,7 @@ static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
return ret;
/* Only the first dent could ever have de_inum.no_addr == 0 */
- if (!tmp->de_inum.no_addr) {
+ if (gfs2_dirent_sentinel(tmp)) {
gfs2_consist_inode(dip);
return -EIO;
}
@@ -621,7 +626,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
{
u16 cur_rec_len, prev_rec_len;
- if (!cur->de_inum.no_addr) {
+ if (gfs2_dirent_sentinel(cur)) {
gfs2_consist_inode(dip);
return;
}
@@ -633,7 +638,8 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
out the inode number and return. */
if (!prev) {
- cur->de_inum.no_addr = 0; /* No endianess worries */
+ cur->de_inum.no_addr = 0;
+ cur->de_inum.no_formal_ino = 0;
return;
}
@@ -664,7 +670,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
struct gfs2_dirent *ndent;
unsigned offset = 0, totlen;
- if (dent->de_inum.no_addr)
+ if (!gfs2_dirent_sentinel(dent))
offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
totlen = be16_to_cpu(dent->de_rec_len);
BUG_ON(offset + name->len > totlen);
@@ -713,12 +719,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
u64 *leaf_out)
{
- u64 leaf_no;
+ __be64 leaf_no;
int error;
error = gfs2_dir_read_data(dip, (char *)&leaf_no,
- index * sizeof(u64),
- sizeof(u64), 0);
+ index * sizeof(__be64),
+ sizeof(__be64), 0);
if (error != sizeof(u64))
return (error < 0) ? error : -EIO;
@@ -837,7 +843,8 @@ static int dir_make_exhash(struct inode *inode)
struct gfs2_leaf *leaf;
int y;
u32 x;
- u64 *lp, bn;
+ __be64 *lp;
+ u64 bn;
int error;
error = gfs2_meta_inode_buffer(dip, &dibh);
@@ -893,20 +900,20 @@ static int dir_make_exhash(struct inode *inode)
gfs2_trans_add_bh(dip->i_gl, dibh, 1);
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
- lp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
+ lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
for (x = sdp->sd_hash_ptrs; x--; lp++)
*lp = cpu_to_be64(bn);
dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
dip->i_di.di_blocks++;
+ gfs2_set_inode_blocks(&dip->i_inode);
dip->i_di.di_flags |= GFS2_DIF_EXHASH;
- dip->i_di.di_payload_format = 0;
for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
dip->i_di.di_depth = y;
- gfs2_dinode_out(&dip->i_di, dibh->b_data);
+ gfs2_dinode_out(dip, dibh->b_data);
brelse(dibh);
@@ -929,7 +936,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
struct gfs2_leaf *nleaf, *oleaf;
struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new;
u32 start, len, half_len, divider;
- u64 bn, *lp, leaf_no;
+ u64 bn, leaf_no;
+ __be64 *lp;
u32 index;
int x, moved = 0;
int error;
@@ -974,7 +982,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
/* Change the pointers.
Don't bother distinguishing stuffed from non-stuffed.
This code is complicated enough already. */
- lp = kmalloc(half_len * sizeof(u64), GFP_NOFS | __GFP_NOFAIL);
+ lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL);
/* Change the pointers */
for (x = 0; x < half_len; x++)
lp[x] = cpu_to_be64(bn);
@@ -1000,7 +1008,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
if (dirent_next(dip, obh, &next))
next = NULL;
- if (dent->de_inum.no_addr &&
+ if (!gfs2_dirent_sentinel(dent) &&
be32_to_cpu(dent->de_hash) < divider) {
struct qstr str;
str.name = (char*)(dent+1);
@@ -1037,7 +1045,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
error = gfs2_meta_inode_buffer(dip, &dibh);
if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
dip->i_di.di_blocks++;
- gfs2_dinode_out(&dip->i_di, dibh->b_data);
+ gfs2_set_inode_blocks(&dip->i_inode);
+ gfs2_dinode_out(dip, dibh->b_data);
brelse(dibh);
}
@@ -1117,7 +1126,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
error = gfs2_meta_inode_buffer(dip, &dibh);
if (!gfs2_assert_withdraw(sdp, !error)) {
dip->i_di.di_depth++;
- gfs2_dinode_out(&dip->i_di, dibh->b_data);
+ gfs2_dinode_out(dip, dibh->b_data);
brelse(dibh);
}
@@ -1194,7 +1203,7 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
int *copied)
{
const struct gfs2_dirent *dent, *dent_next;
- struct gfs2_inum inum;
+ struct gfs2_inum_host inum;
u64 off, off_next;
unsigned int x, y;
int run = 0;
@@ -1341,7 +1350,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
u32 hsize, len = 0;
u32 ht_offset, lp_offset, ht_offset_cur = -1;
u32 hash, index;
- u64 *lp;
+ __be64 *lp;
int copied = 0;
int error = 0;
unsigned depth = 0;
@@ -1365,7 +1374,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
if (ht_offset_cur != ht_offset) {
error = gfs2_dir_read_data(dip, (char *)lp,
- ht_offset * sizeof(u64),
+ ht_offset * sizeof(__be64),
sdp->sd_hash_bsize, 1);
if (error != sdp->sd_hash_bsize) {
if (error >= 0)
@@ -1456,7 +1465,7 @@ out:
*/
int gfs2_dir_search(struct inode *dir, const struct qstr *name,
- struct gfs2_inum *inum, unsigned int *type)
+ struct gfs2_inum_host *inum, unsigned int *type)
{
struct buffer_head *bh;
struct gfs2_dirent *dent;
@@ -1515,7 +1524,8 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
return error;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_di.di_blocks++;
- gfs2_dinode_out(&ip->i_di, bh->b_data);
+ gfs2_set_inode_blocks(&ip->i_inode);
+ gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
return 0;
}
@@ -1531,7 +1541,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
*/
int gfs2_dir_add(struct inode *inode, const struct qstr *name,
- const struct gfs2_inum *inum, unsigned type)
+ const struct gfs2_inum_host *inum, unsigned type)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *bh;
@@ -1558,8 +1568,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
break;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_di.di_entries++;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
- gfs2_dinode_out(&ip->i_di, bh->b_data);
+ ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
error = 0;
break;
@@ -1644,8 +1654,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
gfs2_consist_inode(dip);
gfs2_trans_add_bh(dip->i_gl, bh, 1);
dip->i_di.di_entries--;
- dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
- gfs2_dinode_out(&dip->i_di, bh->b_data);
+ dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
+ gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
mark_inode_dirty(&dip->i_inode);
@@ -1666,7 +1676,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
*/
int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
- struct gfs2_inum *inum, unsigned int new_type)
+ struct gfs2_inum_host *inum, unsigned int new_type)
{
struct buffer_head *bh;
struct gfs2_dirent *dent;
@@ -1692,8 +1702,8 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
gfs2_trans_add_bh(dip->i_gl, bh, 1);
}
- dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
- gfs2_dinode_out(&dip->i_di, bh->b_data);
+ dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
+ gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
return 0;
}
@@ -1715,7 +1725,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
u32 hsize, len;
u32 ht_offset, lp_offset, ht_offset_cur = -1;
u32 index = 0;
- u64 *lp;
+ __be64 *lp;
u64 leaf_no;
int error = 0;
@@ -1735,7 +1745,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
if (ht_offset_cur != ht_offset) {
error = gfs2_dir_read_data(dip, (char *)lp,
- ht_offset * sizeof(u64),
+ ht_offset * sizeof(__be64),
sdp->sd_hash_bsize, 1);
if (error != sdp->sd_hash_bsize) {
if (error >= 0)
@@ -1859,6 +1869,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
if (!dip->i_di.di_blocks)
gfs2_consist_inode(dip);
dip->i_di.di_blocks--;
+ gfs2_set_inode_blocks(&dip->i_inode);
}
error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
@@ -1873,7 +1884,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
goto out_end_trans;
gfs2_trans_add_bh(dip->i_gl, dibh, 1);
- gfs2_dinode_out(&dip->i_di, dibh->b_data);
+ gfs2_dinode_out(dip, dibh->b_data);
brelse(dibh);
out_end_trans:
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 371233419b0..b21b33668a5 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -31,17 +31,17 @@ struct gfs2_inum;
typedef int (*gfs2_filldir_t) (void *opaque,
const char *name, unsigned int length,
u64 offset,
- struct gfs2_inum *inum, unsigned int type);
+ struct gfs2_inum_host *inum, unsigned int type);
int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
- struct gfs2_inum *inum, unsigned int *type);
+ struct gfs2_inum_host *inum, unsigned int *type);
int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
- const struct gfs2_inum *inum, unsigned int type);
+ const struct gfs2_inum_host *inum, unsigned int type);
int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque,
gfs2_filldir_t filldir);
int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
- struct gfs2_inum *new_inum, unsigned int new_type);
+ struct gfs2_inum_host *new_inum, unsigned int new_type);
int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index 92c54e9b0dc..cd747c00f67 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -120,7 +120,7 @@ static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
if (!(er->er_flags & GFS2_ERF_MODE)) {
- er->er_mode = ip->i_di.di_mode;
+ er->er_mode = ip->i_inode.i_mode;
er->er_flags |= GFS2_ERF_MODE;
}
error = gfs2_acl_validate_set(ip, 1, er,
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index a65a4ccfd4d..ebebbdcd705 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -112,7 +112,7 @@ fail:
static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
{
struct buffer_head *bh, *eabh;
- u64 *eablk, *end;
+ __be64 *eablk, *end;
int error;
error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh);
@@ -129,7 +129,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
goto out;
}
- eablk = (u64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
+ eablk = (__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs;
for (; eablk < end; eablk++) {
@@ -224,7 +224,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
struct gfs2_rgrpd *rgd;
struct gfs2_holder rg_gh;
struct buffer_head *dibh;
- u64 *dataptrs, bn = 0;
+ __be64 *dataptrs;
+ u64 bn = 0;
u64 bstart = 0;
unsigned int blen = 0;
unsigned int blks = 0;
@@ -280,6 +281,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
if (!ip->i_di.di_blocks)
gfs2_consist_inode(ip);
ip->i_di.di_blocks--;
+ gfs2_set_inode_blocks(&ip->i_inode);
}
if (bstart)
gfs2_free_meta(ip, bstart, blen);
@@ -299,9 +301,9 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_ctime.tv_sec = get_seconds();
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
@@ -444,7 +446,7 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
struct buffer_head **bh;
unsigned int amount = GFS2_EA_DATA_LEN(ea);
unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
- u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
+ __be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
unsigned int x;
int error = 0;
@@ -597,6 +599,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
ea->ea_num_ptrs = 0;
ip->i_di.di_blocks++;
+ gfs2_set_inode_blocks(&ip->i_inode);
return 0;
}
@@ -629,7 +632,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
ea->ea_num_ptrs = 0;
memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
} else {
- u64 *dataptr = GFS2_EA2DATAPTRS(ea);
+ __be64 *dataptr = GFS2_EA2DATAPTRS(ea);
const char *data = er->er_data;
unsigned int data_len = er->er_data_len;
unsigned int copy;
@@ -648,6 +651,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
ip->i_di.di_blocks++;
+ gfs2_set_inode_blocks(&ip->i_inode);
copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize :
data_len;
@@ -686,7 +690,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
goto out;
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+ error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
if (error)
goto out_gunlock_q;
@@ -710,13 +714,13 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (!error) {
if (er->er_flags & GFS2_ERF_MODE) {
gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
- (ip->i_di.di_mode & S_IFMT) ==
+ (ip->i_inode.i_mode & S_IFMT) ==
(er->er_mode & S_IFMT));
- ip->i_di.di_mode = er->er_mode;
+ ip->i_inode.i_mode = er->er_mode;
}
- ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_ctime.tv_sec = get_seconds();
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
@@ -846,12 +850,12 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
if (er->er_flags & GFS2_ERF_MODE) {
gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
- (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
- ip->i_di.di_mode = er->er_mode;
+ (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
+ ip->i_inode.i_mode = er->er_mode;
}
- ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_ctime.tv_sec = get_seconds();
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
out:
gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -931,12 +935,12 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *indbh, *newbh;
- u64 *eablk;
+ __be64 *eablk;
int error;
int mh_size = sizeof(struct gfs2_meta_header);
if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
- u64 *end;
+ __be64 *end;
error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT,
&indbh);
@@ -948,7 +952,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
goto out;
}
- eablk = (u64 *)(indbh->b_data + mh_size);
+ eablk = (__be64 *)(indbh->b_data + mh_size);
end = eablk + sdp->sd_inptrs;
for (; eablk < end; eablk++)
@@ -971,11 +975,12 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
gfs2_buffer_clear_tail(indbh, mh_size);
- eablk = (u64 *)(indbh->b_data + mh_size);
+ eablk = (__be64 *)(indbh->b_data + mh_size);
*eablk = cpu_to_be64(ip->i_di.di_eattr);
ip->i_di.di_eattr = blk;
ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
ip->i_di.di_blocks++;
+ gfs2_set_inode_blocks(&ip->i_inode);
eablk++;
}
@@ -1129,9 +1134,9 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_ctime.tv_sec = get_seconds();
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
@@ -1202,7 +1207,7 @@ static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
struct buffer_head **bh;
unsigned int amount = GFS2_EA_DATA_LEN(ea);
unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
- u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
+ __be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
unsigned int x;
int error;
@@ -1284,9 +1289,8 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
if (!error) {
error = inode_setattr(&ip->i_inode, attr);
gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
- gfs2_inode_attr_out(ip);
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
@@ -1300,7 +1304,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrp_list rlist;
struct buffer_head *indbh, *dibh;
- u64 *eablk, *end;
+ __be64 *eablk, *end;
unsigned int rg_blocks = 0;
u64 bstart = 0;
unsigned int blen = 0;
@@ -1319,7 +1323,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
goto out;
}
- eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
+ eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
end = eablk + sdp->sd_inptrs;
for (; eablk < end; eablk++) {
@@ -1363,7 +1367,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
gfs2_trans_add_bh(ip->i_gl, indbh, 1);
- eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
+ eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
bstart = 0;
blen = 0;
@@ -1387,6 +1391,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
if (!ip->i_di.di_blocks)
gfs2_consist_inode(ip);
ip->i_di.di_blocks--;
+ gfs2_set_inode_blocks(&ip->i_inode);
}
if (bstart)
gfs2_free_meta(ip, bstart, blen);
@@ -1396,7 +1401,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
@@ -1441,11 +1446,12 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
if (!ip->i_di.di_blocks)
gfs2_consist_inode(ip);
ip->i_di.di_blocks--;
+ gfs2_set_inode_blocks(&ip->i_inode);
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
index ffa65947d68..c82dbe01d71 100644
--- a/fs/gfs2/eattr.h
+++ b/fs/gfs2/eattr.h
@@ -19,7 +19,7 @@ struct iattr;
#define GFS2_EA_SIZE(ea) \
ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
- (sizeof(u64) * (ea)->ea_num_ptrs)), 8)
+ (sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
@@ -29,13 +29,13 @@ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
- sizeof(u64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
+ sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
#define GFS2_EA2DATAPTRS(ea) \
-((u64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
+((__be64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
#define GFS2_EA2NEXT(ea) \
((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 55f5333dae9..438146904b5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -96,7 +96,7 @@ static inline rwlock_t *gl_lock_addr(unsigned int x)
return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
}
#else /* not SMP, so no spinlocks required */
-static inline rwlock_t *gl_lock_addr(x)
+static inline rwlock_t *gl_lock_addr(unsigned int x)
{
return NULL;
}
@@ -769,7 +769,7 @@ restart:
} else {
spin_unlock(&gl->gl_spin);
- new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_KERNEL);
+ new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS);
if (!new_gh)
return;
set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
@@ -785,21 +785,6 @@ out:
gfs2_holder_put(new_gh);
}
-void gfs2_glock_inode_squish(struct inode *inode)
-{
- struct gfs2_holder gh;
- struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
- gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
- set_bit(HIF_DEMOTE, &gh.gh_iflags);
- spin_lock(&gl->gl_spin);
- gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders));
- list_add_tail(&gh.gh_list, &gl->gl_waiters2);
- run_queue(gl);
- spin_unlock(&gl->gl_spin);
- wait_for_completion(&gh.gh_wait);
- gfs2_holder_uninit(&gh);
-}
-
/**
* state_change - record that the glock is now in a different state
* @gl: the glock
@@ -847,12 +832,12 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
if (glops->go_inval)
- glops->go_inval(gl, DIO_METADATA | DIO_DATA);
+ glops->go_inval(gl, DIO_METADATA);
} else if (gl->gl_state == LM_ST_DEFERRED) {
/* We might not want to do this here.
Look at moving to the inode glops. */
if (glops->go_inval)
- glops->go_inval(gl, DIO_DATA);
+ glops->go_inval(gl, 0);
}
/* Deal with each possible exit condition */
@@ -954,7 +939,7 @@ void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
gfs2_assert_warn(sdp, state != gl->gl_state);
if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
- glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
+ glops->go_sync(gl);
gfs2_glock_hold(gl);
gl->gl_req_bh = xmote_bh;
@@ -995,7 +980,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
state_change(gl, LM_ST_UNLOCKED);
if (glops->go_inval)
- glops->go_inval(gl, DIO_METADATA | DIO_DATA);
+ glops->go_inval(gl, DIO_METADATA);
if (gh) {
spin_lock(&gl->gl_spin);
@@ -1041,7 +1026,7 @@ void gfs2_glock_drop_th(struct gfs2_glock *gl)
gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
- glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
+ glops->go_sync(gl);
gfs2_glock_hold(gl);
gl->gl_req_bh = drop_bh;
@@ -1244,9 +1229,6 @@ restart:
clear_bit(GLF_PREFETCH, &gl->gl_flags);
- if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP))
- dump_glock(gl);
-
return error;
}
@@ -1923,7 +1905,7 @@ out:
static void scan_glock(struct gfs2_glock *gl)
{
- if (gl->gl_ops == &gfs2_inode_glops)
+ if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
return;
if (gfs2_glmutex_trylock(gl)) {
@@ -2078,7 +2060,7 @@ static int dump_inode(struct gfs2_inode *ip)
printk(KERN_INFO " num = %llu %llu\n",
(unsigned long long)ip->i_num.no_formal_ino,
(unsigned long long)ip->i_num.no_addr);
- printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode));
+ printk(KERN_INFO " type = %u\n", IF2DT(ip->i_inode.i_mode));
printk(KERN_INFO " i_flags =");
for (x = 0; x < 32; x++)
if (test_bit(x, &ip->i_flags))
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 2b2a889ee2c..fb39108fc05 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -27,8 +27,6 @@
#define GL_ATIME 0x00000200
#define GL_NOCACHE 0x00000400
#define GL_NOCANCEL 0x00001000
-#define GL_AOP 0x00004000
-#define GL_DUMP 0x00008000
#define GLR_TRYFAILED 13
#define GLR_CANCELED 14
@@ -108,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
unsigned int state, int flags);
-void gfs2_glock_inode_squish(struct inode *inode);
/**
* gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 41a6b6818a5..b068d10bcb6 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -92,7 +92,7 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
ip = gl->gl_object;
inode = &ip->i_inode;
- if (!ip || !S_ISREG(ip->i_di.di_mode))
+ if (!ip || !S_ISREG(inode->i_mode))
return;
if (!test_bit(GIF_PAGED, &ip->i_flags))
@@ -107,89 +107,20 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
}
/**
- * gfs2_page_inval - Invalidate all pages associated with a glock
- * @gl: the glock
- *
- */
-
-static void gfs2_page_inval(struct gfs2_glock *gl)
-{
- struct gfs2_inode *ip;
- struct inode *inode;
-
- ip = gl->gl_object;
- inode = &ip->i_inode;
- if (!ip || !S_ISREG(ip->i_di.di_mode))
- return;
-
- truncate_inode_pages(inode->i_mapping, 0);
- gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages);
- clear_bit(GIF_PAGED, &ip->i_flags);
-}
-
-/**
- * gfs2_page_wait - Wait for writeback of data
- * @gl: the glock
- *
- * Syncs data (not metadata) for a regular file.
- * No-op for all other types.
- */
-
-static void gfs2_page_wait(struct gfs2_glock *gl)
-{
- struct gfs2_inode *ip = gl->gl_object;
- struct inode *inode = &ip->i_inode;
- struct address_space *mapping = inode->i_mapping;
- int error;
-
- if (!S_ISREG(ip->i_di.di_mode))
- return;
-
- error = filemap_fdatawait(mapping);
-
- /* Put back any errors cleared by filemap_fdatawait()
- so they can be caught by someone who can pass them
- up to user space. */
-
- if (error == -ENOSPC)
- set_bit(AS_ENOSPC, &mapping->flags);
- else if (error)
- set_bit(AS_EIO, &mapping->flags);
-
-}
-
-static void gfs2_page_writeback(struct gfs2_glock *gl)
-{
- struct gfs2_inode *ip = gl->gl_object;
- struct inode *inode = &ip->i_inode;
- struct address_space *mapping = inode->i_mapping;
-
- if (!S_ISREG(ip->i_di.di_mode))
- return;
-
- filemap_fdatawrite(mapping);
-}
-
-/**
* meta_go_sync - sync out the metadata for this glock
* @gl: the glock
- * @flags: DIO_*
*
* Called when demoting or unlocking an EX glock. We must flush
* to disk all dirty buffers/pages relating to this glock, and must not
* not return to caller to demote/unlock the glock until I/O is complete.
*/
-static void meta_go_sync(struct gfs2_glock *gl, int flags)
+static void meta_go_sync(struct gfs2_glock *gl)
{
- if (!(flags & DIO_METADATA))
- return;
-
if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
gfs2_meta_sync(gl);
- if (flags & DIO_RELEASE)
- gfs2_ail_empty_gl(gl);
+ gfs2_ail_empty_gl(gl);
}
}
@@ -264,31 +195,31 @@ static void inode_go_drop_th(struct gfs2_glock *gl)
/**
* inode_go_sync - Sync the dirty data and/or metadata for an inode glock
* @gl: the glock protecting the inode
- * @flags:
*
*/
-static void inode_go_sync(struct gfs2_glock *gl, int flags)
+static void inode_go_sync(struct gfs2_glock *gl)
{
- int meta = (flags & DIO_METADATA);
- int data = (flags & DIO_DATA);
+ struct gfs2_inode *ip = gl->gl_object;
+
+ if (ip && !S_ISREG(ip->i_inode.i_mode))
+ ip = NULL;
if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
- if (meta && data) {
- gfs2_page_writeback(gl);
- gfs2_log_flush(gl->gl_sbd, gl);
- gfs2_meta_sync(gl);
- gfs2_page_wait(gl);
- clear_bit(GLF_DIRTY, &gl->gl_flags);
- } else if (meta) {
- gfs2_log_flush(gl->gl_sbd, gl);
- gfs2_meta_sync(gl);
- } else if (data) {
- gfs2_page_writeback(gl);
- gfs2_page_wait(gl);
+ gfs2_log_flush(gl->gl_sbd, gl);
+ if (ip)
+ filemap_fdatawrite(ip->i_inode.i_mapping);
+ gfs2_meta_sync(gl);
+ if (ip) {
+ struct address_space *mapping = ip->i_inode.i_mapping;
+ int error = filemap_fdatawait(mapping);
+ if (error == -ENOSPC)
+ set_bit(AS_ENOSPC, &mapping->flags);
+ else if (error)
+ set_bit(AS_EIO, &mapping->flags);
}
- if (flags & DIO_RELEASE)
- gfs2_ail_empty_gl(gl);
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
+ gfs2_ail_empty_gl(gl);
}
}
@@ -301,15 +232,20 @@ static void inode_go_sync(struct gfs2_glock *gl, int flags)
static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
+ struct gfs2_inode *ip = gl->gl_object;
int meta = (flags & DIO_METADATA);
- int data = (flags & DIO_DATA);
if (meta) {
gfs2_meta_inval(gl);
- gl->gl_vn++;
+ if (ip)
+ set_bit(GIF_INVALID, &ip->i_flags);
+ }
+
+ if (ip && S_ISREG(ip->i_inode.i_mode)) {
+ truncate_inode_pages(ip->i_inode.i_mapping, 0);
+ gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !ip->i_inode.i_mapping->nrpages);
+ clear_bit(GIF_PAGED, &ip->i_flags);
}
- if (data)
- gfs2_page_inval(gl);
}
/**
@@ -351,11 +287,10 @@ static int inode_go_lock(struct gfs2_holder *gh)
if (!ip)
return 0;
- if (ip->i_vn != gl->gl_vn) {
+ if (test_bit(GIF_INVALID, &ip->i_flags)) {
error = gfs2_inode_refresh(ip);
if (error)
return error;
- gfs2_inode_attr_in(ip);
}
if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
@@ -379,11 +314,8 @@ static void inode_go_unlock(struct gfs2_holder *gh)
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_inode *ip = gl->gl_object;
- if (ip == NULL)
- return;
- if (test_bit(GLF_DIRTY, &gl->gl_flags))
- gfs2_inode_attr_in(ip);
- gfs2_meta_cache_flush(ip);
+ if (ip)
+ gfs2_meta_cache_flush(ip);
}
/**
@@ -491,13 +423,13 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
- struct gfs2_log_header head;
+ struct gfs2_log_header_host head;
int error;
if (gl->gl_state != LM_ST_UNLOCKED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode));
- j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
+ j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head);
if (error)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 118dc693d11..734421edae8 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -14,8 +14,6 @@
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
-#define DIO_DATA 0x00000040
-#define DIO_RELEASE 0x00000080
#define DIO_ALL 0x00000100
struct gfs2_log_operations;
@@ -41,7 +39,7 @@ struct gfs2_log_operations {
void (*lo_before_commit) (struct gfs2_sbd *sdp);
void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
void (*lo_before_scan) (struct gfs2_jdesc *jd,
- struct gfs2_log_header *head, int pass);
+ struct gfs2_log_header_host *head, int pass);
int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass);
@@ -67,8 +65,8 @@ struct gfs2_rgrpd {
struct list_head rd_list_mru;
struct list_head rd_recent; /* Recently used rgrps */
struct gfs2_glock *rd_gl; /* Glock for this rgrp */
- struct gfs2_rindex rd_ri;
- struct gfs2_rgrp rd_rg;
+ struct gfs2_rindex_host rd_ri;
+ struct gfs2_rgrp_host rd_rg;
u64 rd_rg_vn;
struct gfs2_bitmap *rd_bits;
unsigned int rd_bh_count;
@@ -103,18 +101,17 @@ struct gfs2_bufdata {
};
struct gfs2_glock_operations {
- void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
- int flags);
- void (*go_xmote_bh) (struct gfs2_glock * gl);
- void (*go_drop_th) (struct gfs2_glock * gl);
- void (*go_drop_bh) (struct gfs2_glock * gl);
- void (*go_sync) (struct gfs2_glock * gl, int flags);
- void (*go_inval) (struct gfs2_glock * gl, int flags);
- int (*go_demote_ok) (struct gfs2_glock * gl);
- int (*go_lock) (struct gfs2_holder * gh);
- void (*go_unlock) (struct gfs2_holder * gh);
- void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
- void (*go_greedy) (struct gfs2_glock * gl);
+ void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags);
+ void (*go_xmote_bh) (struct gfs2_glock *gl);
+ void (*go_drop_th) (struct gfs2_glock *gl);
+ void (*go_drop_bh) (struct gfs2_glock *gl);
+ void (*go_sync) (struct gfs2_glock *gl);
+ void (*go_inval) (struct gfs2_glock *gl, int flags);
+ int (*go_demote_ok) (struct gfs2_glock *gl);
+ int (*go_lock) (struct gfs2_holder *gh);
+ void (*go_unlock) (struct gfs2_holder *gh);
+ void (*go_callback) (struct gfs2_glock *gl, unsigned int state);
+ void (*go_greedy) (struct gfs2_glock *gl);
const int go_type;
};
@@ -217,6 +214,7 @@ struct gfs2_alloc {
};
enum {
+ GIF_INVALID = 0,
GIF_QD_LOCKED = 1,
GIF_PAGED = 2,
GIF_SW_PAGED = 3,
@@ -224,12 +222,11 @@ enum {
struct gfs2_inode {
struct inode i_inode;
- struct gfs2_inum i_num;
+ struct gfs2_inum_host i_num;
unsigned long i_flags; /* GIF_... */
- u64 i_vn;
- struct gfs2_dinode i_di; /* To be replaced by ref to block */
+ struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
struct gfs2_glock *i_gl; /* Move into i_gh? */
struct gfs2_holder i_iopen_gh;
@@ -450,7 +447,7 @@ struct gfs2_sbd {
struct super_block *sd_vfs_meta;
struct kobject sd_kobj;
unsigned long sd_flags; /* SDF_... */
- struct gfs2_sb sd_sb;
+ struct gfs2_sb_host sd_sb;
/* Constants computed on mount */
@@ -503,8 +500,8 @@ struct gfs2_sbd {
spinlock_t sd_statfs_spin;
struct mutex sd_statfs_mutex;
- struct gfs2_statfs_change sd_statfs_master;
- struct gfs2_statfs_change sd_statfs_local;
+ struct gfs2_statfs_change_host sd_statfs_master;
+ struct gfs2_statfs_change_host sd_statfs_local;
unsigned long sd_statfs_sync_time;
/* Resource group stuff */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d470e5286ec..d122074c45e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -38,83 +38,12 @@
#include "trans.h"
#include "util.h"
-/**
- * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
- * @ip: The GFS2 inode (with embedded disk inode data)
- * @inode: The Linux VFS inode
- *
- */
-
-void gfs2_inode_attr_in(struct gfs2_inode *ip)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_dinode *di = &ip->i_di;
-
- inode->i_ino = ip->i_num.no_addr;
-
- switch (di->di_mode & S_IFMT) {
- case S_IFBLK:
- case S_IFCHR:
- inode->i_rdev = MKDEV(di->di_major, di->di_minor);
- break;
- default:
- inode->i_rdev = 0;
- break;
- };
-
- inode->i_mode = di->di_mode;
- inode->i_nlink = di->di_nlink;
- inode->i_uid = di->di_uid;
- inode->i_gid = di->di_gid;
- i_size_write(inode, di->di_size);
- inode->i_atime.tv_sec = di->di_atime;
- inode->i_mtime.tv_sec = di->di_mtime;
- inode->i_ctime.tv_sec = di->di_ctime;
- inode->i_atime.tv_nsec = 0;
- inode->i_mtime.tv_nsec = 0;
- inode->i_ctime.tv_nsec = 0;
- inode->i_blocks = di->di_blocks <<
- (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
-
- if (di->di_flags & GFS2_DIF_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
-
- if (di->di_flags & GFS2_DIF_APPENDONLY)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
-}
-
-/**
- * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
- * @ip: The GFS2 inode
- *
- * Only copy out the attributes that we want the VFS layer
- * to be able to modify.
- */
-
-void gfs2_inode_attr_out(struct gfs2_inode *ip)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_dinode *di = &ip->i_di;
- gfs2_assert_withdraw(GFS2_SB(inode),
- (di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
- di->di_mode = inode->i_mode;
- di->di_uid = inode->i_uid;
- di->di_gid = inode->i_gid;
- di->di_atime = inode->i_atime.tv_sec;
- di->di_mtime = inode->i_mtime.tv_sec;
- di->di_ctime = inode->i_ctime.tv_sec;
-}
-
static int iget_test(struct inode *inode, void *opaque)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_inum *inum = opaque;
+ struct gfs2_inum_host *inum = opaque;
- if (ip && ip->i_num.no_addr == inum->no_addr)
+ if (ip->i_num.no_addr == inum->no_addr)
return 1;
return 0;
@@ -123,19 +52,20 @@ static int iget_test(struct inode *inode, void *opaque)
static int iget_set(struct inode *inode, void *opaque)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_inum *inum = opaque;
+ struct gfs2_inum_host *inum = opaque;
ip->i_num = *inum;
+ inode->i_ino = inum->no_addr;
return 0;
}
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
{
return ilookup5(sb, (unsigned long)inum->no_formal_ino,
iget_test, inum);
}
-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
+static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
{
return iget5_locked(sb, (unsigned long)inum->no_formal_ino,
iget_test, iget_set, inum);
@@ -150,7 +80,7 @@ static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
* Returns: A VFS inode, or an error
*/
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
{
struct inode *inode = gfs2_iget(sb, inum);
struct gfs2_inode *ip = GFS2_I(inode);
@@ -188,7 +118,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum,
if (unlikely(error))
goto fail_put;
- ip->i_vn = ip->i_gl->gl_vn - 1;
+ set_bit(GIF_INVALID, &ip->i_flags);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail_iopen;
@@ -208,6 +138,63 @@ fail:
return ERR_PTR(error);
}
+static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+{
+ struct gfs2_dinode_host *di = &ip->i_di;
+ const struct gfs2_dinode *str = buf;
+
+ if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+ if (gfs2_consist_inode(ip))
+ gfs2_dinode_print(ip);
+ return -EIO;
+ }
+ if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
+ return -ESTALE;
+
+ ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
+ ip->i_inode.i_rdev = 0;
+ switch (ip->i_inode.i_mode & S_IFMT) {
+ case S_IFBLK:
+ case S_IFCHR:
+ ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
+ be32_to_cpu(str->di_minor));
+ break;
+ };
+
+ ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
+ ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
+ /*
+ * We will need to review setting the nlink count here in the
+ * light of the forthcoming ro bind mount work. This is a reminder
+ * to do that.
+ */
+ ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
+ di->di_size = be64_to_cpu(str->di_size);
+ i_size_write(&ip->i_inode, di->di_size);
+ di->di_blocks = be64_to_cpu(str->di_blocks);
+ gfs2_set_inode_blocks(&ip->i_inode);
+ ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
+ ip->i_inode.i_atime.tv_nsec = 0;
+ ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+ ip->i_inode.i_mtime.tv_nsec = 0;
+ ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+ ip->i_inode.i_ctime.tv_nsec = 0;
+
+ di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
+ di->di_goal_data = be64_to_cpu(str->di_goal_data);
+ di->di_generation = be64_to_cpu(str->di_generation);
+
+ di->di_flags = be32_to_cpu(str->di_flags);
+ gfs2_set_inode_flags(&ip->i_inode);
+ di->di_height = be16_to_cpu(str->di_height);
+
+ di->di_depth = be16_to_cpu(str->di_depth);
+ di->di_entries = be32_to_cpu(str->di_entries);
+
+ di->di_eattr = be64_to_cpu(str->di_eattr);
+ return 0;
+}
+
/**
* gfs2_inode_refresh - Refresh the incore copy of the dinode
* @ip: The GFS2 inode
@@ -229,21 +216,11 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
return -EIO;
}
- gfs2_dinode_in(&ip->i_di, dibh->b_data);
-
+ error = gfs2_dinode_in(ip, dibh->b_data);
brelse(dibh);
+ clear_bit(GIF_INVALID, &ip->i_flags);
- if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
- if (gfs2_consist_inode(ip))
- gfs2_dinode_print(&ip->i_di);
- return -EIO;
- }
- if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
- return -ESTALE;
-
- ip->i_vn = ip->i_gl->gl_vn;
-
- return 0;
+ return error;
}
int gfs2_dinode_dealloc(struct gfs2_inode *ip)
@@ -255,7 +232,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
if (ip->i_di.di_blocks != 1) {
if (gfs2_consist_inode(ip))
- gfs2_dinode_print(&ip->i_di);
+ gfs2_dinode_print(ip);
return -EIO;
}
@@ -318,14 +295,14 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
u32 nlink;
int error;
- BUG_ON(ip->i_di.di_nlink != ip->i_inode.i_nlink);
- nlink = ip->i_di.di_nlink + diff;
+ BUG_ON(diff != 1 && diff != -1);
+ nlink = ip->i_inode.i_nlink + diff;
/* If we are reducing the nlink count, but the new value ends up being
bigger than the old one, we must have underflowed. */
- if (diff < 0 && nlink > ip->i_di.di_nlink) {
+ if (diff < 0 && nlink > ip->i_inode.i_nlink) {
if (gfs2_consist_inode(ip))
- gfs2_dinode_print(&ip->i_di);
+ gfs2_dinode_print(ip);
return -EIO;
}
@@ -333,16 +310,19 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
if (error)
return error;
- ip->i_di.di_nlink = nlink;
- ip->i_di.di_ctime = get_seconds();
- ip->i_inode.i_nlink = nlink;
+ if (diff > 0)
+ inc_nlink(&ip->i_inode);
+ else
+ drop_nlink(&ip->i_inode);
+
+ ip->i_inode.i_ctime.tv_sec = get_seconds();
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
mark_inode_dirty(&ip->i_inode);
- if (ip->i_di.di_nlink == 0) {
+ if (ip->i_inode.i_nlink == 0) {
struct gfs2_rgrpd *rgd;
struct gfs2_holder ri_gh, rg_gh;
@@ -357,7 +337,6 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
if (error)
goto out_norgrp;
- clear_nlink(&ip->i_inode);
gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
gfs2_glock_dq_uninit(&rg_gh);
out_norgrp:
@@ -394,7 +373,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
struct super_block *sb = dir->i_sb;
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_holder d_gh;
- struct gfs2_inum inum;
+ struct gfs2_inum_host inum;
unsigned int type;
int error = 0;
struct inode *inode = NULL;
@@ -436,7 +415,7 @@ static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
struct buffer_head *bh;
- struct gfs2_inum_range ir;
+ struct gfs2_inum_range_host ir;
int error;
error = gfs2_trans_begin(sdp, RES_DINODE, 0);
@@ -479,7 +458,7 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode);
struct gfs2_holder gh;
struct buffer_head *bh;
- struct gfs2_inum_range ir;
+ struct gfs2_inum_range_host ir;
int error;
error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
@@ -500,21 +479,22 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
if (!ir.ir_length) {
struct buffer_head *m_bh;
u64 x, y;
+ __be64 z;
error = gfs2_meta_inode_buffer(m_ip, &m_bh);
if (error)
goto out_brelse;
- x = *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
- x = y = be64_to_cpu(x);
+ z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
+ x = y = be64_to_cpu(z);
ir.ir_start = x;
ir.ir_length = GFS2_INUM_QUANTUM;
x += GFS2_INUM_QUANTUM;
if (x < y)
gfs2_consist_inode(m_ip);
- x = cpu_to_be64(x);
+ z = cpu_to_be64(x);
gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
- *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
+ *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z;
brelse(m_bh);
}
@@ -567,7 +547,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
return error;
/* Don't create entries in an unlinked directory */
- if (!dip->i_di.di_nlink)
+ if (!dip->i_inode.i_nlink)
return -EPERM;
error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
@@ -583,7 +563,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
if (dip->i_di.di_entries == (u32)-1)
return -EFBIG;
- if (S_ISDIR(mode) && dip->i_di.di_nlink == (u32)-1)
+ if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1)
return -EMLINK;
return 0;
@@ -593,24 +573,24 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
unsigned int *uid, unsigned int *gid)
{
if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
- (dip->i_di.di_mode & S_ISUID) && dip->i_di.di_uid) {
+ (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
if (S_ISDIR(*mode))
*mode |= S_ISUID;
- else if (dip->i_di.di_uid != current->fsuid)
+ else if (dip->i_inode.i_uid != current->fsuid)
*mode &= ~07111;
- *uid = dip->i_di.di_uid;
+ *uid = dip->i_inode.i_uid;
} else
*uid = current->fsuid;
- if (dip->i_di.di_mode & S_ISGID) {
+ if (dip->i_inode.i_mode & S_ISGID) {
if (S_ISDIR(*mode))
*mode |= S_ISGID;
- *gid = dip->i_di.di_gid;
+ *gid = dip->i_inode.i_gid;
} else
*gid = current->fsgid;
}
-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum *inum,
+static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
u64 *generation)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
@@ -650,9 +630,9 @@ out:
*/
static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
- const struct gfs2_inum *inum, unsigned int mode,
+ const struct gfs2_inum_host *inum, unsigned int mode,
unsigned int uid, unsigned int gid,
- const u64 *generation)
+ const u64 *generation, dev_t dev)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
@@ -669,14 +649,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
di->di_mode = cpu_to_be32(mode);
di->di_uid = cpu_to_be32(uid);
di->di_gid = cpu_to_be32(gid);
- di->di_nlink = cpu_to_be32(0);
- di->di_size = cpu_to_be64(0);
+ di->di_nlink = 0;
+ di->di_size = 0;
di->di_blocks = cpu_to_be64(1);
di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
- di->di_major = di->di_minor = cpu_to_be32(0);
+ di->di_major = cpu_to_be32(MAJOR(dev));
+ di->di_minor = cpu_to_be32(MINOR(dev));
di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
di->di_generation = cpu_to_be64(*generation);
- di->di_flags = cpu_to_be32(0);
+ di->di_flags = 0;
if (S_ISREG(mode)) {
if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
@@ -693,22 +674,22 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
}
di->__pad1 = 0;
- di->di_payload_format = cpu_to_be32(0);
- di->di_height = cpu_to_be32(0);
+ di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
+ di->di_height = 0;
di->__pad2 = 0;
di->__pad3 = 0;
- di->di_depth = cpu_to_be16(0);
- di->di_entries = cpu_to_be32(0);
+ di->di_depth = 0;
+ di->di_entries = 0;
memset(&di->__pad4, 0, sizeof(di->__pad4));
- di->di_eattr = cpu_to_be64(0);
+ di->di_eattr = 0;
memset(&di->di_reserved, 0, sizeof(di->di_reserved));
brelse(dibh);
}
static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
- unsigned int mode, const struct gfs2_inum *inum,
- const u64 *generation)
+ unsigned int mode, const struct gfs2_inum_host *inum,
+ const u64 *generation, dev_t dev)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
unsigned int uid, gid;
@@ -729,7 +710,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
if (error)
goto out_quota;
- init_dinode(dip, gl, inum, mode, uid, gid, generation);
+ init_dinode(dip, gl, inum, mode, uid, gid, generation, dev);
gfs2_quota_change(dip, +1, uid, gid);
gfs2_trans_end(sdp);
@@ -759,8 +740,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
if (alloc_required < 0)
goto fail;
if (alloc_required) {
- error = gfs2_quota_check(dip, dip->i_di.di_uid,
- dip->i_di.di_gid);
+ error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
if (error)
goto fail_quota_locks;
@@ -782,16 +762,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
goto fail_quota_locks;
}
- error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
+ error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
if (error)
goto fail_end_trans;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto fail_end_trans;
- ip->i_di.di_nlink = 1;
+ ip->i_inode.i_nlink = 1;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
return 0;
@@ -860,13 +840,13 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
*/
struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
- unsigned int mode)
+ unsigned int mode, dev_t dev)
{
struct inode *inode;
struct gfs2_inode *dip = ghs->gh_gl->gl_object;
struct inode *dir = &dip->i_inode;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- struct gfs2_inum inum;
+ struct gfs2_inum_host inum;
int error;
u64 generation;
@@ -890,35 +870,12 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (error)
goto fail_gunlock;
- if (inum.no_addr < dip->i_num.no_addr) {
- gfs2_glock_dq(ghs);
-
- error = gfs2_glock_nq_num(sdp, inum.no_addr,
- &gfs2_inode_glops, LM_ST_EXCLUSIVE,
- GL_SKIP, ghs + 1);
- if (error) {
- return ERR_PTR(error);
- }
-
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
- error = gfs2_glock_nq(ghs);
- if (error) {
- gfs2_glock_dq_uninit(ghs + 1);
- return ERR_PTR(error);
- }
-
- error = create_ok(dip, name, mode);
- if (error)
- goto fail_gunlock2;
- } else {
- error = gfs2_glock_nq_num(sdp, inum.no_addr,
- &gfs2_inode_glops, LM_ST_EXCLUSIVE,
- GL_SKIP, ghs + 1);
- if (error)
- goto fail_gunlock;
- }
+ error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops,
+ LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+ if (error)
+ goto fail_gunlock;
- error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation);
+ error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev);
if (error)
goto fail_gunlock2;
@@ -975,7 +932,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
if (ip->i_di.di_entries != 2) {
if (gfs2_consist_inode(ip))
- gfs2_dinode_print(&ip->i_di);
+ gfs2_dinode_print(ip);
return -EIO;
}
@@ -997,7 +954,12 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
if (error)
return error;
- error = gfs2_change_nlink(ip, -2);
+ /* It looks odd, but it really should be done twice */
+ error = gfs2_change_nlink(ip, -1);
+ if (error)
+ return error;
+
+ error = gfs2_change_nlink(ip, -1);
if (error)
return error;
@@ -1018,16 +980,16 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
struct gfs2_inode *ip)
{
- struct gfs2_inum inum;
+ struct gfs2_inum_host inum;
unsigned int type;
int error;
if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
return -EPERM;
- if ((dip->i_di.di_mode & S_ISVTX) &&
- dip->i_di.di_uid != current->fsuid &&
- ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
+ if ((dip->i_inode.i_mode & S_ISVTX) &&
+ dip->i_inode.i_uid != current->fsuid &&
+ ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER))
return -EPERM;
if (IS_APPEND(&dip->i_inode))
@@ -1044,7 +1006,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
if (!gfs2_inum_equal(&inum, &ip->i_num))
return -ENOENT;
- if (IF2DT(ip->i_di.di_mode) != type) {
+ if (IF2DT(ip->i_inode.i_mode) != type) {
gfs2_consist_inode(dip);
return -EIO;
}
@@ -1194,7 +1156,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
return 0;
curtime = get_seconds();
- if (curtime - ip->i_di.di_atime >= quantum) {
+ if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
gfs2_glock_dq(gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
gh);
@@ -1206,7 +1168,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
trying to get exclusive lock. */
curtime = get_seconds();
- if (curtime - ip->i_di.di_atime >= quantum) {
+ if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
struct buffer_head *dibh;
struct gfs2_dinode *di;
@@ -1220,11 +1182,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
if (error)
goto fail_end_trans;
- ip->i_di.di_atime = curtime;
+ ip->i_inode.i_atime.tv_sec = curtime;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
di = (struct gfs2_dinode *)dibh->b_data;
- di->di_atime = cpu_to_be64(ip->i_di.di_atime);
+ di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
brelse(dibh);
gfs2_trans_end(sdp);
@@ -1249,92 +1211,6 @@ fail:
return error;
}
-/**
- * glock_compare_atime - Compare two struct gfs2_glock structures for sort
- * @arg_a: the first structure
- * @arg_b: the second structure
- *
- * Returns: 1 if A > B
- * -1 if A < B
- * 0 if A == B
- */
-
-static int glock_compare_atime(const void *arg_a, const void *arg_b)
-{
- const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
- const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
- const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
- const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
-
- if (a->ln_number > b->ln_number)
- return 1;
- if (a->ln_number < b->ln_number)
- return -1;
- if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
- return 1;
- if (gh_a->gh_state == LM_ST_SHARED && (gh_b->gh_flags & GL_ATIME))
- return 1;
-
- return 0;
-}
-
-/**
- * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
- * atime update
- * @num_gh: the number of structures
- * @ghs: an array of struct gfs2_holder structures
- *
- * Returns: 0 on success (all glocks acquired),
- * errno on failure (no glocks acquired)
- */
-
-int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
-{
- struct gfs2_holder **p;
- unsigned int x;
- int error = 0;
-
- if (!num_gh)
- return 0;
-
- if (num_gh == 1) {
- ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
- if (ghs->gh_flags & GL_ATIME)
- error = gfs2_glock_nq_atime(ghs);
- else
- error = gfs2_glock_nq(ghs);
- return error;
- }
-
- p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- for (x = 0; x < num_gh; x++)
- p[x] = &ghs[x];
-
- sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
-
- for (x = 0; x < num_gh; x++) {
- p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
-
- if (p[x]->gh_flags & GL_ATIME)
- error = gfs2_glock_nq_atime(p[x]);
- else
- error = gfs2_glock_nq(p[x]);
-
- if (error) {
- while (x--)
- gfs2_glock_dq(p[x]);
- break;
- }
- }
-
- kfree(p);
- return error;
-}
-
-
static int
__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
{
@@ -1345,10 +1221,8 @@ __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
if (!error) {
error = inode_setattr(&ip->i_inode, attr);
gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
- gfs2_inode_attr_out(ip);
-
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
return error;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index f5d86176057..b57f448b15b 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -22,13 +22,19 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip)
static inline int gfs2_is_dir(struct gfs2_inode *ip)
{
- return S_ISDIR(ip->i_di.di_mode);
+ return S_ISDIR(ip->i_inode.i_mode);
+}
+
+static inline void gfs2_set_inode_blocks(struct inode *inode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ inode->i_blocks = ip->i_di.di_blocks <<
+ (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
}
void gfs2_inode_attr_in(struct gfs2_inode *ip);
-void gfs2_inode_attr_out(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned type);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum);
+struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
+struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
int gfs2_inode_refresh(struct gfs2_inode *ip);
@@ -37,19 +43,15 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root, struct nameidata *nd);
struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
- unsigned int mode);
+ unsigned int mode, dev_t dev);
int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
struct gfs2_inode *ip);
int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
struct gfs2_inode *ip);
int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
-
int gfs2_glock_nq_atime(struct gfs2_holder *gh);
-int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
-
int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
-
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
#endif /* __INODE_DOT_H__ */
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0cace3da9db..291415ddfe5 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -15,6 +15,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
+#include <linux/delay.h>
#include "gfs2.h"
#include "incore.h"
@@ -142,7 +143,7 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
return list_empty(&ai->ai_ail1_list);
}
-void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
+static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
{
struct list_head *head = &sdp->sd_ail1_list;
u64 sync_gen;
@@ -261,6 +262,12 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
* @sdp: The GFS2 superblock
* @blks: The number of blocks to reserve
*
+ * Note that we never give out the last 6 blocks of the journal. Thats
+ * due to the fact that there is are a small number of header blocks
+ * associated with each log flush. The exact number can't be known until
+ * flush time, so we ensure that we have just enough free blocks at all
+ * times to avoid running out during a log flush.
+ *
* Returns: errno
*/
@@ -274,7 +281,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
mutex_lock(&sdp->sd_log_reserve_mutex);
gfs2_log_lock(sdp);
- while(sdp->sd_log_blks_free <= blks) {
+ while(sdp->sd_log_blks_free <= (blks + 6)) {
gfs2_log_unlock(sdp);
gfs2_ail1_empty(sdp, 0);
gfs2_log_flush(sdp, NULL);
@@ -319,7 +326,8 @@ static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
bh_map.b_size = 1 << inode->i_blkbits;
error = gfs2_block_map(inode, lbn, 0, &bh_map);
if (error || !bh_map.b_blocknr)
- printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn);
+ printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error,
+ (unsigned long long)bh_map.b_blocknr, lbn);
gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
return bh_map.b_blocknr;
@@ -643,12 +651,9 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
up_read(&sdp->sd_log_flush_lock);
gfs2_log_lock(sdp);
- if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
- gfs2_log_unlock(sdp);
- gfs2_log_flush(sdp, NULL);
- } else {
- gfs2_log_unlock(sdp);
- }
+ if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks))
+ wake_up_process(sdp->sd_logd_process);
+ gfs2_log_unlock(sdp);
}
/**
@@ -686,3 +691,21 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
up_write(&sdp->sd_log_flush_lock);
}
+
+/**
+ * gfs2_meta_syncfs - sync all the buffers in a filesystem
+ * @sdp: the filesystem
+ *
+ */
+
+void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
+{
+ gfs2_log_flush(sdp, NULL);
+ for (;;) {
+ gfs2_ail1_start(sdp, DIO_ALL);
+ if (gfs2_ail1_empty(sdp, DIO_ALL))
+ break;
+ msleep(10);
+ }
+}
+
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 7f5737d5561..8e7aa0f2910 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,7 +48,6 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
unsigned int ssize);
-void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
@@ -61,5 +60,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
void gfs2_log_shutdown(struct gfs2_sbd *sdp);
+void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index ab6d1115f95..4d7f94d8c7b 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -182,7 +182,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
}
static void buf_lo_before_scan(struct gfs2_jdesc *jd,
- struct gfs2_log_header *head, int pass)
+ struct gfs2_log_header_host *head, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
@@ -328,7 +328,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
}
static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
- struct gfs2_log_header *head, int pass)
+ struct gfs2_log_header_host *head, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
@@ -509,7 +509,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
LIST_HEAD(started);
struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
- struct buffer_head *bh = NULL;
+ struct buffer_head *bh = NULL,*bh1 = NULL;
unsigned int offset = sizeof(struct gfs2_log_descriptor);
struct gfs2_log_descriptor *ld;
unsigned int limit;
@@ -537,8 +537,13 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
list_for_each_entry_safe_continue(bd1, bdt,
&sdp->sd_log_le_databuf,
bd_le.le_list) {
+ /* store off the buffer head in a local ptr since
+ * gfs2_bufdata might change when we drop the log lock
+ */
+ bh1 = bd1->bd_bh;
+
/* An ordered write buffer */
- if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
+ if (bh1 && !buffer_pinned(bh1)) {
list_move(&bd1->bd_le.le_list, &started);
if (bd1 == bd2) {
bd2 = NULL;
@@ -547,20 +552,21 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
bd_le.le_list);
}
total_dbuf--;
- if (bd1->bd_bh) {
- get_bh(bd1->bd_bh);
- if (buffer_dirty(bd1->bd_bh)) {
+ if (bh1) {
+ if (buffer_dirty(bh1)) {
+ get_bh(bh1);
+
gfs2_log_unlock(sdp);
- wait_on_buffer(bd1->bd_bh);
- ll_rw_block(WRITE, 1,
- &bd1->bd_bh);
+
+ ll_rw_block(SWRITE, 1, &bh1);
+ brelse(bh1);
+
gfs2_log_lock(sdp);
}
- brelse(bd1->bd_bh);
continue;
}
continue;
- } else if (bd1->bd_bh) { /* A journaled buffer */
+ } else if (bh1) { /* A journaled buffer */
int magic;
gfs2_log_unlock(sdp);
if (!bh) {
@@ -582,16 +588,16 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
}
- magic = gfs2_check_magic(bd1->bd_bh);
- *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
+ magic = gfs2_check_magic(bh1);
+ *ptr++ = cpu_to_be64(bh1->b_blocknr);
*ptr++ = cpu_to_be64((__u64)magic);
- clear_buffer_escaped(bd1->bd_bh);
+ clear_buffer_escaped(bh1);
if (unlikely(magic != 0))
- set_buffer_escaped(bd1->bd_bh);
+ set_buffer_escaped(bh1);
gfs2_log_lock(sdp);
if (n++ > num)
break;
- } else if (!bd1->bd_bh) {
+ } else if (!bh1) {
total_dbuf--;
sdp->sd_log_num_databuf--;
list_del_init(&bd1->bd_le.le_list);
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 5839c05ae6b..965bc65c7c6 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -60,7 +60,7 @@ static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
}
static inline void lops_before_scan(struct gfs2_jdesc *jd,
- struct gfs2_log_header *head,
+ struct gfs2_log_header_host *head,
unsigned int pass)
{
int x;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 3912d6a4b1e..0e34d991897 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -127,17 +127,17 @@ void gfs2_meta_sync(struct gfs2_glock *gl)
/**
* getbuf - Get a buffer with a given address space
- * @sdp: the filesystem
- * @aspace: the address space
+ * @gl: the glock
* @blkno: the block number (filesystem scope)
* @create: 1 if the buffer should be created
*
* Returns: the buffer
*/
-static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
- u64 blkno, int create)
+static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create)
{
+ struct address_space *mapping = gl->gl_aspace->i_mapping;
+ struct gfs2_sbd *sdp = gl->gl_sbd;
struct page *page;
struct buffer_head *bh;
unsigned int shift;
@@ -150,13 +150,13 @@ static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
if (create) {
for (;;) {
- page = grab_cache_page(aspace->i_mapping, index);
+ page = grab_cache_page(mapping, index);
if (page)
break;
yield();
}
} else {
- page = find_lock_page(aspace->i_mapping, index);
+ page = find_lock_page(mapping, index);
if (!page)
return NULL;
}
@@ -202,7 +202,7 @@ static void meta_prep_new(struct buffer_head *bh)
struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
{
struct buffer_head *bh;
- bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
+ bh = getbuf(gl, blkno, CREATE);
meta_prep_new(bh);
return bh;
}
@@ -220,7 +220,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head **bhp)
{
- *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
+ *bhp = getbuf(gl, blkno, CREATE);
if (!buffer_uptodate(*bhp))
ll_rw_block(READ_META, 1, bhp);
if (flags & DIO_WAIT) {
@@ -379,11 +379,10 @@ void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct inode *aspace = ip->i_gl->gl_aspace;
struct buffer_head *bh;
while (blen) {
- bh = getbuf(sdp, aspace, bstart, NO_CREATE);
+ bh = getbuf(ip->i_gl, bstart, NO_CREATE);
if (bh) {
struct gfs2_bufdata *bd = bh->b_private;
@@ -472,6 +471,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
int in_cache = 0;
+ BUG_ON(!gl);
+ BUG_ON(!sdp);
+
spin_lock(&ip->i_spin);
if (*bh_slot && (*bh_slot)->b_blocknr == num) {
bh = *bh_slot;
@@ -481,7 +483,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
spin_unlock(&ip->i_spin);
if (!bh)
- bh = getbuf(gl->gl_sbd, gl->gl_aspace, num, CREATE);
+ bh = getbuf(gl, num, CREATE);
if (!bh)
return -ENOBUFS;
@@ -532,7 +534,6 @@ err:
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
- struct inode *aspace = gl->gl_aspace;
struct buffer_head *first_bh, *bh;
u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
sdp->sd_sb.sb_bsize_shift;
@@ -544,7 +545,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (extlen > max_ra)
extlen = max_ra;
- first_bh = getbuf(sdp, aspace, dblock, CREATE);
+ first_bh = getbuf(gl, dblock, CREATE);
if (buffer_uptodate(first_bh))
goto out;
@@ -555,7 +556,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
extlen--;
while (extlen) {
- bh = getbuf(sdp, aspace, dblock, CREATE);
+ bh = getbuf(gl, dblock, CREATE);
if (!buffer_uptodate(bh) && !buffer_locked(bh))
ll_rw_block(READA, 1, &bh);
@@ -571,20 +572,3 @@ out:
return first_bh;
}
-/**
- * gfs2_meta_syncfs - sync all the buffers in a filesystem
- * @sdp: the filesystem
- *
- */
-
-void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
-{
- gfs2_log_flush(sdp, NULL);
- for (;;) {
- gfs2_ail1_start(sdp, DIO_ALL);
- if (gfs2_ail1_empty(sdp, DIO_ALL))
- break;
- msleep(10);
- }
-}
-
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 3ec939e20df..e037425bc04 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -67,7 +67,6 @@ static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
}
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
-void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
#define buffer_busy(bh) \
((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 1025960b0e6..f2495f1e21a 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -15,6 +15,8 @@
#include "gfs2.h"
#include <linux/gfs2_ondisk.h>
+#include <linux/lm_interface.h>
+#include "incore.h"
#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
struct->member);
@@ -32,7 +34,7 @@
* first arg: the cpu-order structure
*/
-void gfs2_inum_in(struct gfs2_inum *no, const void *buf)
+void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
{
const struct gfs2_inum *str = buf;
@@ -40,7 +42,7 @@ void gfs2_inum_in(struct gfs2_inum *no, const void *buf)
no->no_addr = be64_to_cpu(str->no_addr);
}
-void gfs2_inum_out(const struct gfs2_inum *no, void *buf)
+void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
{
struct gfs2_inum *str = buf;
@@ -48,13 +50,13 @@ void gfs2_inum_out(const struct gfs2_inum *no, void *buf)
str->no_addr = cpu_to_be64(no->no_addr);
}
-static void gfs2_inum_print(const struct gfs2_inum *no)
+static void gfs2_inum_print(const struct gfs2_inum_host *no)
{
printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr);
}
-static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf)
+static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
{
const struct gfs2_meta_header *str = buf;
@@ -63,23 +65,7 @@ static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf)
mh->mh_format = be32_to_cpu(str->mh_format);
}
-static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf)
-{
- struct gfs2_meta_header *str = buf;
-
- str->mh_magic = cpu_to_be32(mh->mh_magic);
- str->mh_type = cpu_to_be32(mh->mh_type);
- str->mh_format = cpu_to_be32(mh->mh_format);
-}
-
-static void gfs2_meta_header_print(const struct gfs2_meta_header *mh)
-{
- pv(mh, mh_magic, "0x%.8X");
- pv(mh, mh_type, "%u");
- pv(mh, mh_format, "%u");
-}
-
-void gfs2_sb_in(struct gfs2_sb *sb, const void *buf)
+void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
{
const struct gfs2_sb *str = buf;
@@ -97,7 +83,7 @@ void gfs2_sb_in(struct gfs2_sb *sb, const void *buf)
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
}
-void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf)
+void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
{
const struct gfs2_rindex *str = buf;
@@ -109,7 +95,7 @@ void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf)
}
-void gfs2_rindex_print(const struct gfs2_rindex *ri)
+void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
{
printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
pv(ri, ri_length, "%u");
@@ -120,22 +106,20 @@ void gfs2_rindex_print(const struct gfs2_rindex *ri)
pv(ri, ri_bitbytes, "%u");
}
-void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf)
+void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
{
const struct gfs2_rgrp *str = buf;
- gfs2_meta_header_in(&rg->rg_header, buf);
rg->rg_flags = be32_to_cpu(str->rg_flags);
rg->rg_free = be32_to_cpu(str->rg_free);
rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
}
-void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf)
+void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
{
struct gfs2_rgrp *str = buf;
- gfs2_meta_header_out(&rg->rg_header, buf);
str->rg_flags = cpu_to_be32(rg->rg_flags);
str->rg_free = cpu_to_be32(rg->rg_free);
str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
@@ -144,7 +128,7 @@ void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf)
memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
}
-void gfs2_quota_in(struct gfs2_quota *qu, const void *buf)
+void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
{
const struct gfs2_quota *str = buf;
@@ -153,96 +137,56 @@ void gfs2_quota_in(struct gfs2_quota *qu, const void *buf)
qu->qu_value = be64_to_cpu(str->qu_value);
}
-void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf)
-{
- const struct gfs2_dinode *str = buf;
-
- gfs2_meta_header_in(&di->di_header, buf);
- gfs2_inum_in(&di->di_num, &str->di_num);
-
- di->di_mode = be32_to_cpu(str->di_mode);
- di->di_uid = be32_to_cpu(str->di_uid);
- di->di_gid = be32_to_cpu(str->di_gid);
- di->di_nlink = be32_to_cpu(str->di_nlink);
- di->di_size = be64_to_cpu(str->di_size);
- di->di_blocks = be64_to_cpu(str->di_blocks);
- di->di_atime = be64_to_cpu(str->di_atime);
- di->di_mtime = be64_to_cpu(str->di_mtime);
- di->di_ctime = be64_to_cpu(str->di_ctime);
- di->di_major = be32_to_cpu(str->di_major);
- di->di_minor = be32_to_cpu(str->di_minor);
-
- di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
- di->di_goal_data = be64_to_cpu(str->di_goal_data);
- di->di_generation = be64_to_cpu(str->di_generation);
-
- di->di_flags = be32_to_cpu(str->di_flags);
- di->di_payload_format = be32_to_cpu(str->di_payload_format);
- di->di_height = be16_to_cpu(str->di_height);
-
- di->di_depth = be16_to_cpu(str->di_depth);
- di->di_entries = be32_to_cpu(str->di_entries);
-
- di->di_eattr = be64_to_cpu(str->di_eattr);
-
-}
-
-void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf)
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
{
+ const struct gfs2_dinode_host *di = &ip->i_di;
struct gfs2_dinode *str = buf;
- gfs2_meta_header_out(&di->di_header, buf);
- gfs2_inum_out(&di->di_num, (char *)&str->di_num);
+ str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+ str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+ str->di_header.__pad0 = 0;
+ str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+ str->di_header.__pad1 = 0;
- str->di_mode = cpu_to_be32(di->di_mode);
- str->di_uid = cpu_to_be32(di->di_uid);
- str->di_gid = cpu_to_be32(di->di_gid);
- str->di_nlink = cpu_to_be32(di->di_nlink);
+ gfs2_inum_out(&ip->i_num, &str->di_num);
+
+ str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+ str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+ str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+ str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
str->di_size = cpu_to_be64(di->di_size);
str->di_blocks = cpu_to_be64(di->di_blocks);
- str->di_atime = cpu_to_be64(di->di_atime);
- str->di_mtime = cpu_to_be64(di->di_mtime);
- str->di_ctime = cpu_to_be64(di->di_ctime);
- str->di_major = cpu_to_be32(di->di_major);
- str->di_minor = cpu_to_be32(di->di_minor);
+ str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+ str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+ str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
str->di_goal_data = cpu_to_be64(di->di_goal_data);
str->di_generation = cpu_to_be64(di->di_generation);
str->di_flags = cpu_to_be32(di->di_flags);
- str->di_payload_format = cpu_to_be32(di->di_payload_format);
str->di_height = cpu_to_be16(di->di_height);
-
+ str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+ !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+ GFS2_FORMAT_DE : 0);
str->di_depth = cpu_to_be16(di->di_depth);
str->di_entries = cpu_to_be32(di->di_entries);
str->di_eattr = cpu_to_be64(di->di_eattr);
-
}
-void gfs2_dinode_print(const struct gfs2_dinode *di)
+void gfs2_dinode_print(const struct gfs2_inode *ip)
{
- gfs2_meta_header_print(&di->di_header);
- gfs2_inum_print(&di->di_num);
+ const struct gfs2_dinode_host *di = &ip->i_di;
+
+ gfs2_inum_print(&ip->i_num);
- pv(di, di_mode, "0%o");
- pv(di, di_uid, "%u");
- pv(di, di_gid, "%u");
- pv(di, di_nlink, "%u");
printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks);
- printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime);
- printk(KERN_INFO " di_mtime = %lld\n", (long long)di->di_mtime);
- printk(KERN_INFO " di_ctime = %lld\n", (long long)di->di_ctime);
- pv(di, di_major, "%u");
- pv(di, di_minor, "%u");
-
printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
pv(di, di_flags, "0x%.8X");
- pv(di, di_payload_format, "%u");
pv(di, di_height, "%u");
pv(di, di_depth, "%u");
@@ -251,7 +195,7 @@ void gfs2_dinode_print(const struct gfs2_dinode *di)
printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr);
}
-void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf)
+void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
{
const struct gfs2_log_header *str = buf;
@@ -263,7 +207,7 @@ void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf)
lh->lh_hash = be32_to_cpu(str->lh_hash);
}
-void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf)
+void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
{
const struct gfs2_inum_range *str = buf;
@@ -271,7 +215,7 @@ void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf)
ir->ir_length = be64_to_cpu(str->ir_length);
}
-void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf)
+void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
{
struct gfs2_inum_range *str = buf;
@@ -279,7 +223,7 @@ void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf)
str->ir_length = cpu_to_be64(ir->ir_length);
}
-void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf)
+void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
{
const struct gfs2_statfs_change *str = buf;
@@ -288,7 +232,7 @@ void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf)
sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
}
-void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf)
+void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
{
struct gfs2_statfs_change *str = buf;
@@ -297,7 +241,7 @@ void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf)
str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
}
-void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf)
+void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
{
const struct gfs2_quota_change *str = buf;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 015640b3f12..d8d69a72a10 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -156,19 +156,6 @@ out_ignore:
return 0;
}
-static int zero_readpage(struct page *page)
-{
- void *kaddr;
-
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
-
- SetPageUptodate(page);
-
- return 0;
-}
-
/**
* stuffed_readpage - Fill in a Linux page with stuffed file data
* @ip: the inode
@@ -183,9 +170,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
void *kaddr;
int error;
- /* Only the first page of a stuffed file might contain data */
- if (unlikely(page->index))
- return zero_readpage(page);
+ BUG_ON(page->index);
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
@@ -230,9 +215,9 @@ static int gfs2_readpage(struct file *file, struct page *page)
/* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
goto skip_lock;
}
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
do_unlock = 1;
- error = gfs2_glock_nq_m_atime(1, &gh);
+ error = gfs2_glock_nq_atime(&gh);
if (unlikely(error))
goto out_unlock;
}
@@ -254,6 +239,8 @@ skip_lock:
out:
return error;
out_unlock:
+ if (error == GLR_TRYFAILED)
+ error = AOP_TRUNCATED_PAGE;
unlock_page(page);
if (do_unlock)
gfs2_holder_uninit(&gh);
@@ -293,9 +280,9 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
goto skip_lock;
}
gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
- LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
+ LM_FLAG_TRY_1CB|GL_ATIME, &gh);
do_unlock = 1;
- ret = gfs2_glock_nq_m_atime(1, &gh);
+ ret = gfs2_glock_nq_atime(&gh);
if (ret == GLR_TRYFAILED)
goto out_noerror;
if (unlikely(ret))
@@ -366,10 +353,13 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
unsigned int write_len = to - from;
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
- error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
- if (error)
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
+ error = gfs2_glock_nq_atime(&ip->i_gh);
+ if (unlikely(error)) {
+ if (error == GLR_TRYFAILED)
+ error = AOP_TRUNCATED_PAGE;
goto out_uninit;
+ }
gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks);
@@ -386,7 +376,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
if (error)
goto out_alloc_put;
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+ error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
if (error)
goto out_qunlock;
@@ -482,8 +472,10 @@ static int gfs2_commit_write(struct file *file, struct page *page,
SetPageUptodate(page);
- if (inode->i_size < file_size)
+ if (inode->i_size < file_size) {
i_size_write(inode, file_size);
+ mark_inode_dirty(inode);
+ }
} else {
if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
gfs2_is_jdata(ip))
@@ -498,11 +490,6 @@ static int gfs2_commit_write(struct file *file, struct page *page,
di->di_size = cpu_to_be64(inode->i_size);
}
- di->di_mode = cpu_to_be32(inode->i_mode);
- di->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
- di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
- di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
-
brelse(dibh);
gfs2_trans_end(sdp);
if (al->al_requested) {
@@ -624,7 +611,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
* on this path. All we need change is atime.
*/
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- rv = gfs2_glock_nq_m_atime(1, &gh);
+ rv = gfs2_glock_nq_atime(&gh);
if (rv)
goto out;
@@ -737,6 +724,9 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
if (!atomic_read(&aspace->i_writecount))
return 0;
+ if (!(gfp_mask & __GFP_WAIT))
+ return 0;
+
if (time_after_eq(jiffies, t)) {
stuck_releasepage(bh);
/* should we withdraw here? */
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index 00041b1b802..d355899585d 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
struct inode *inode = dentry->d_inode;
struct gfs2_holder d_gh;
struct gfs2_inode *ip;
- struct gfs2_inum inum;
+ struct gfs2_inum_host inum;
unsigned int type;
int error;
@@ -76,7 +76,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
if (!gfs2_inum_equal(&ip->i_num, &inum))
goto invalid_gunlock;
- if (IF2DT(ip->i_di.di_mode) != type) {
+ if (IF2DT(ip->i_inode.i_mode) != type) {
gfs2_consist_inode(dip);
goto fail_gunlock;
}
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 86127d93bd3..b4e7b877531 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -27,15 +27,16 @@
#include "util.h"
static struct dentry *gfs2_decode_fh(struct super_block *sb,
- __u32 *fh,
+ __u32 *p,
int fh_len,
int fh_type,
int (*acceptable)(void *context,
struct dentry *dentry),
void *context)
{
+ __be32 *fh = (__force __be32 *)p;
struct gfs2_fh_obj fh_obj;
- struct gfs2_inum *this, parent;
+ struct gfs2_inum_host *this, parent;
if (fh_type != fh_len)
return NULL;
@@ -65,9 +66,10 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
acceptable, context);
}
-static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
+static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
int connectable)
{
+ __be32 *fh = (__force __be32 *)p;
struct inode *inode = dentry->d_inode;
struct super_block *sb = inode->i_sb;
struct gfs2_inode *ip = GFS2_I(inode);
@@ -76,14 +78,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
(connectable && *len < GFS2_LARGE_FH_SIZE))
return 255;
- fh[0] = ip->i_num.no_formal_ino >> 32;
- fh[0] = cpu_to_be32(fh[0]);
- fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
- fh[1] = cpu_to_be32(fh[1]);
- fh[2] = ip->i_num.no_addr >> 32;
- fh[2] = cpu_to_be32(fh[2]);
- fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
- fh[3] = cpu_to_be32(fh[3]);
+ fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+ fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+ fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
+ fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
*len = GFS2_SMALL_FH_SIZE;
if (!connectable || inode == sb->s_root->d_inode)
@@ -95,14 +93,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
igrab(inode);
spin_unlock(&dentry->d_lock);
- fh[4] = ip->i_num.no_formal_ino >> 32;
- fh[4] = cpu_to_be32(fh[4]);
- fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
- fh[5] = cpu_to_be32(fh[5]);
- fh[6] = ip->i_num.no_addr >> 32;
- fh[6] = cpu_to_be32(fh[6]);
- fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
- fh[7] = cpu_to_be32(fh[7]);
+ fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+ fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+ fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
+ fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
fh[8] = cpu_to_be32(inode->i_mode);
fh[9] = 0; /* pad to double word */
@@ -114,12 +108,12 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
}
struct get_name_filldir {
- struct gfs2_inum inum;
+ struct gfs2_inum_host inum;
char *name;
};
static int get_name_filldir(void *opaque, const char *name, unsigned int length,
- u64 offset, struct gfs2_inum *inum,
+ u64 offset, struct gfs2_inum_host *inum,
unsigned int type)
{
struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
@@ -202,7 +196,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
- struct gfs2_inum *inum = &fh_obj->this;
+ struct gfs2_inum_host *inum = &fh_obj->this;
struct gfs2_holder i_gh, ri_gh, rgd_gh;
struct gfs2_rgrpd *rgd;
struct inode *inode;
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
index 09aca5046fb..f925a955b3b 100644
--- a/fs/gfs2/ops_export.h
+++ b/fs/gfs2/ops_export.h
@@ -15,7 +15,7 @@
extern struct export_operations gfs2_export_ops;
struct gfs2_fh_obj {
- struct gfs2_inum this;
+ struct gfs2_inum_host this;
__u32 imode;
};
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 3064f133bf3..b3f1e0349ae 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -22,6 +22,7 @@
#include <linux/ext2_fs.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
+#include <linux/writeback.h>
#include <asm/uaccess.h>
#include "gfs2.h"
@@ -71,7 +72,7 @@ static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
size = count;
kaddr = kmap(page);
- memcpy(desc->arg.buf, kaddr + offset, size);
+ memcpy(desc->arg.data, kaddr + offset, size);
kunmap(page);
desc->count = count - size;
@@ -86,7 +87,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
struct inode *inode = &ip->i_inode;
read_descriptor_t desc;
desc.written = 0;
- desc.arg.buf = buf;
+ desc.arg.data = buf;
desc.count = size;
desc.error = 0;
do_generic_mapping_read(inode->i_mapping, ra_state,
@@ -139,7 +140,7 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
*/
static int filldir_func(void *opaque, const char *name, unsigned int length,
- u64 offset, struct gfs2_inum *inum,
+ u64 offset, struct gfs2_inum_host *inum,
unsigned int type)
{
struct filldir_reg *fdr = (struct filldir_reg *)opaque;
@@ -253,7 +254,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
u32 fsflags;
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- error = gfs2_glock_nq_m_atime(1, &gh);
+ error = gfs2_glock_nq_atime(&gh);
if (error)
return error;
@@ -266,6 +267,24 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
return error;
}
+void gfs2_set_inode_flags(struct inode *inode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_dinode_host *di = &ip->i_di;
+ unsigned int flags = inode->i_flags;
+
+ flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ if (di->di_flags & GFS2_DIF_IMMUTABLE)
+ flags |= S_IMMUTABLE;
+ if (di->di_flags & GFS2_DIF_APPENDONLY)
+ flags |= S_APPEND;
+ if (di->di_flags & GFS2_DIF_NOATIME)
+ flags |= S_NOATIME;
+ if (di->di_flags & GFS2_DIF_SYNC)
+ flags |= S_SYNC;
+ inode->i_flags = flags;
+}
+
/* Flags that can be set by user space */
#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \
GFS2_DIF_DIRECTIO| \
@@ -336,8 +355,9 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
goto out_trans_end;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_di.di_flags = new_flags;
- gfs2_dinode_out(&ip->i_di, bh->b_data);
+ gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
+ gfs2_set_inode_flags(inode);
out_trans_end:
gfs2_trans_end(sdp);
out:
@@ -425,7 +445,7 @@ static int gfs2_open(struct inode *inode, struct file *file)
gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
file->private_data = fp;
- if (S_ISREG(ip->i_di.di_mode)) {
+ if (S_ISREG(ip->i_inode.i_mode)) {
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
&i_gh);
if (error)
@@ -484,16 +504,40 @@ static int gfs2_close(struct inode *inode, struct file *file)
* @file: the file that points to the dentry (we ignore this)
* @dentry: the dentry that points to the inode to sync
*
+ * The VFS will flush "normal" data for us. We only need to worry
+ * about metadata here. For journaled data, we just do a log flush
+ * as we can't avoid it. Otherwise we can just bale out if datasync
+ * is set. For stuffed inodes we must flush the log in order to
+ * ensure that all data is on disk.
+ *
+ * The call to write_inode_now() is there to write back metadata and
+ * the inode itself. It does also try and write the data, but thats
+ * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
+ * for us.
+ *
* Returns: errno
*/
static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
{
- struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+ struct inode *inode = dentry->d_inode;
+ int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
+ int ret = 0;
- gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
+ if (gfs2_is_jdata(GFS2_I(inode))) {
+ gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+ return 0;
+ }
- return 0;
+ if (sync_state != 0) {
+ if (!datasync)
+ ret = write_inode_now(inode, 0);
+
+ if (gfs2_is_stuffed(GFS2_I(inode)))
+ gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+ }
+
+ return ret;
}
/**
@@ -515,7 +559,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
- if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+ if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
return -ENOLCK;
if (sdp->sd_args.ar_localflocks) {
@@ -617,7 +661,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
- if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+ if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
return -ENOLCK;
if (sdp->sd_args.ar_localflocks)
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
index ce319f89ec8..7e5d8ec9c84 100644
--- a/fs/gfs2/ops_file.h
+++ b/fs/gfs2/ops_file.h
@@ -17,7 +17,7 @@ extern struct file gfs2_internal_file_sentinel;
extern int gfs2_internal_read(struct gfs2_inode *ip,
struct file_ra_state *ra_state,
char *buf, loff_t *pos, unsigned size);
-
+extern void gfs2_set_inode_flags(struct inode *inode);
extern const struct file_operations gfs2_file_fops;
extern const struct file_operations gfs2_dir_fops;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 882873a6bd6..d14e139d267 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -237,7 +237,7 @@ fail:
}
static struct inode *gfs2_lookup_root(struct super_block *sb,
- struct gfs2_inum *inum)
+ struct gfs2_inum_host *inum)
{
return gfs2_inode_lookup(sb, inum, DT_DIR);
}
@@ -246,7 +246,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
{
struct super_block *sb = sdp->sd_vfs;
struct gfs2_holder sb_gh;
- struct gfs2_inum *inum;
+ struct gfs2_inum_host *inum;
struct inode *inode;
int error = 0;
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index ef6e5ed70e9..636dda4c7d3 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -59,7 +59,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
gfs2_holder_init(dip->i_gl, 0, 0, ghs);
for (;;) {
- inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
+ inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
if (!IS_ERR(inode)) {
gfs2_trans_end(sdp);
if (dip->i_alloc.al_rgd)
@@ -144,7 +144,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
int alloc_required;
int error;
- if (S_ISDIR(ip->i_di.di_mode))
+ if (S_ISDIR(inode->i_mode))
return -EPERM;
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
@@ -169,7 +169,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
}
error = -EINVAL;
- if (!dip->i_di.di_nlink)
+ if (!dip->i_inode.i_nlink)
goto out_gunlock;
error = -EFBIG;
if (dip->i_di.di_entries == (u32)-1)
@@ -178,10 +178,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out_gunlock;
error = -EINVAL;
- if (!ip->i_di.di_nlink)
+ if (!ip->i_inode.i_nlink)
goto out_gunlock;
error = -EMLINK;
- if (ip->i_di.di_nlink == (u32)-1)
+ if (ip->i_inode.i_nlink == (u32)-1)
goto out_gunlock;
alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
@@ -196,8 +196,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (error)
goto out_alloc;
- error = gfs2_quota_check(dip, dip->i_di.di_uid,
- dip->i_di.di_gid);
+ error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
if (error)
goto out_gunlock_q;
@@ -220,7 +219,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
}
error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
- IF2DT(ip->i_di.di_mode));
+ IF2DT(inode->i_mode));
if (error)
goto out_end_trans;
@@ -326,7 +325,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
gfs2_holder_init(dip->i_gl, 0, 0, ghs);
- inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
+ inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
if (IS_ERR(inode)) {
gfs2_holder_uninit(ghs);
return PTR_ERR(inode);
@@ -339,7 +338,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!gfs2_assert_withdraw(sdp, !error)) {
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
size);
brelse(dibh);
@@ -379,7 +378,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
gfs2_holder_init(dip->i_gl, 0, 0, ghs);
- inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
+ inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
if (IS_ERR(inode)) {
gfs2_holder_uninit(ghs);
return PTR_ERR(inode);
@@ -387,10 +386,9 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
ip = ghs[1].gh_gl->gl_object;
- ip->i_di.di_nlink = 2;
+ ip->i_inode.i_nlink = 2;
ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
ip->i_di.di_flags |= GFS2_DIF_JDATA;
- ip->i_di.di_payload_format = GFS2_FORMAT_DE;
ip->i_di.di_entries = 2;
error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -414,7 +412,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
gfs2_inum_out(&dip->i_num, &dent->de_inum);
dent->de_type = cpu_to_be16(DT_DIR);
- gfs2_dinode_out(&ip->i_di, di);
+ gfs2_dinode_out(ip, di);
brelse(dibh);
}
@@ -467,7 +465,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
if (ip->i_di.di_entries < 2) {
if (gfs2_consist_inode(ip))
- gfs2_dinode_print(&ip->i_di);
+ gfs2_dinode_print(ip);
error = -EIO;
goto out_gunlock;
}
@@ -504,47 +502,19 @@ out:
static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
dev_t dev)
{
- struct gfs2_inode *dip = GFS2_I(dir), *ip;
+ struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_holder ghs[2];
struct inode *inode;
- struct buffer_head *dibh;
- u32 major = 0, minor = 0;
- int error;
-
- switch (mode & S_IFMT) {
- case S_IFBLK:
- case S_IFCHR:
- major = MAJOR(dev);
- minor = MINOR(dev);
- break;
- case S_IFIFO:
- case S_IFSOCK:
- break;
- default:
- return -EOPNOTSUPP;
- };
gfs2_holder_init(dip->i_gl, 0, 0, ghs);
- inode = gfs2_createi(ghs, &dentry->d_name, mode);
+ inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
if (IS_ERR(inode)) {
gfs2_holder_uninit(ghs);
return PTR_ERR(inode);
}
- ip = ghs[1].gh_gl->gl_object;
-
- ip->i_di.di_major = major;
- ip->i_di.di_minor = minor;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
-
- if (!gfs2_assert_withdraw(sdp, !error)) {
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
- }
-
gfs2_trans_end(sdp);
if (dip->i_alloc.al_rgd)
gfs2_inplace_release(dip);
@@ -592,11 +562,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
/* Make sure we aren't trying to move a dirctory into it's subdir */
- if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
+ if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) {
dir_rename = 1;
- error = gfs2_glock_nq_init(sdp->sd_rename_gl,
- LM_ST_EXCLUSIVE, 0,
+ error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0,
&r_gh);
if (error)
goto out;
@@ -637,10 +606,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (error)
goto out_gunlock;
- if (S_ISDIR(nip->i_di.di_mode)) {
+ if (S_ISDIR(nip->i_inode.i_mode)) {
if (nip->i_di.di_entries < 2) {
if (gfs2_consist_inode(nip))
- gfs2_dinode_print(&nip->i_di);
+ gfs2_dinode_print(nip);
error = -EIO;
goto out_gunlock;
}
@@ -666,7 +635,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
};
if (odip != ndip) {
- if (!ndip->i_di.di_nlink) {
+ if (!ndip->i_inode.i_nlink) {
error = -EINVAL;
goto out_gunlock;
}
@@ -674,8 +643,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = -EFBIG;
goto out_gunlock;
}
- if (S_ISDIR(ip->i_di.di_mode) &&
- ndip->i_di.di_nlink == (u32)-1) {
+ if (S_ISDIR(ip->i_inode.i_mode) &&
+ ndip->i_inode.i_nlink == (u32)-1) {
error = -EMLINK;
goto out_gunlock;
}
@@ -702,8 +671,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (error)
goto out_alloc;
- error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
- ndip->i_di.di_gid);
+ error = gfs2_quota_check(ndip, ndip->i_inode.i_uid, ndip->i_inode.i_gid);
if (error)
goto out_gunlock_q;
@@ -729,7 +697,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
/* Remove the target file, if it exists */
if (nip) {
- if (S_ISDIR(nip->i_di.di_mode))
+ if (S_ISDIR(nip->i_inode.i_mode))
error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
else {
error = gfs2_dir_del(ndip, &ndentry->d_name);
@@ -760,9 +728,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto out_end_trans;
- ip->i_di.di_ctime = get_seconds();
+ ip->i_inode.i_ctime.tv_sec = get_seconds();
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
@@ -771,7 +739,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
goto out_end_trans;
error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
- IF2DT(ip->i_di.di_mode));
+ IF2DT(ip->i_inode.i_mode));
if (error)
goto out_end_trans;
@@ -867,6 +835,10 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
* @mask:
* @nd: passed from Linux VFS, ignored by us
*
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done.
+ *
* Returns: errno
*/
@@ -875,15 +847,18 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder i_gh;
int error;
+ int unlock = 0;
- if (ip->i_vn == ip->i_gl->gl_vn)
- return generic_permission(inode, mask, gfs2_check_acl);
+ if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+ if (error)
+ return error;
+ unlock = 1;
+ }
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
- if (!error) {
- error = generic_permission(inode, mask, gfs2_check_acl_locked);
+ error = generic_permission(inode, mask, gfs2_check_acl);
+ if (unlock)
gfs2_glock_dq_uninit(&i_gh);
- }
return error;
}
@@ -914,8 +889,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
u32 ouid, ogid, nuid, ngid;
int error;
- ouid = ip->i_di.di_uid;
- ogid = ip->i_di.di_gid;
+ ouid = inode->i_uid;
+ ogid = inode->i_gid;
nuid = attr->ia_uid;
ngid = attr->ia_gid;
@@ -946,10 +921,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
error = inode_setattr(inode, attr);
gfs2_assert_warn(sdp, !error);
- gfs2_inode_attr_out(ip);
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
@@ -1018,6 +992,12 @@ out:
* @dentry: The dentry to stat
* @stat: The inode's stats
*
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done. Note that its the NFS
+ * readdirplus operation which causes this to be called (from filldir)
+ * with the glock already held.
+ *
* Returns: errno
*/
@@ -1028,14 +1008,20 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int error;
+ int unlock = 0;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
- if (!error) {
- generic_fillattr(inode, stat);
- gfs2_glock_dq_uninit(&gh);
+ if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+ if (error)
+ return error;
+ unlock = 1;
}
- return error;
+ generic_fillattr(inode, stat);
+ if (unlock);
+ gfs2_glock_dq_uninit(&gh);
+
+ return 0;
}
static int gfs2_setxattr(struct dentry *dentry, const char *name,
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index b47d9598c04..7685b46f934 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -157,7 +157,8 @@ static void gfs2_write_super(struct super_block *sb)
static int gfs2_sync_fs(struct super_block *sb, int wait)
{
sb->s_dirt = 0;
- gfs2_log_flush(sb->s_fs_info, NULL);
+ if (wait)
+ gfs2_log_flush(sb->s_fs_info, NULL);
return 0;
}
@@ -215,7 +216,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_inode->i_sb;
struct gfs2_sbd *sdp = sb->s_fs_info;
- struct gfs2_statfs_change sc;
+ struct gfs2_statfs_change_host sc;
int error;
if (gfs2_tune_get(sdp, gt_statfs_slow))
@@ -293,8 +294,6 @@ static void gfs2_clear_inode(struct inode *inode)
*/
if (inode->i_private) {
struct gfs2_inode *ip = GFS2_I(inode);
- gfs2_glock_inode_squish(inode);
- gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED);
ip->i_gl->gl_object = NULL;
gfs2_glock_schedule_for_reclaim(ip->i_gl);
gfs2_glock_put(ip->i_gl);
@@ -395,7 +394,7 @@ static void gfs2_delete_inode(struct inode *inode)
if (!inode->i_private)
goto out;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh);
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
if (unlikely(error)) {
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out;
@@ -407,7 +406,7 @@ static void gfs2_delete_inode(struct inode *inode)
if (error)
goto out_uninit;
- if (S_ISDIR(ip->i_di.di_mode) &&
+ if (S_ISDIR(inode->i_mode) &&
(ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
error = gfs2_dir_exhash_dealloc(ip);
if (error)
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index 5453d2947ab..45a5f11fc39 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -76,7 +76,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
if (error)
goto out;
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+ error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
if (error)
goto out_gunlock_q;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a3deae7416c..d0db881b55d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -452,19 +452,19 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
return 0;
- error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
+ error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd);
if (error)
goto out;
al->al_qd_num++;
qd++;
- error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
+ error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd);
if (error)
goto out;
al->al_qd_num++;
qd++;
- if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
+ if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
if (error)
goto out;
@@ -472,7 +472,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
qd++;
}
- if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
+ if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) {
error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
if (error)
goto out;
@@ -539,8 +539,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
qc->qc_id = cpu_to_be32(qd->qd_id);
}
- x = qc->qc_change;
- x = be64_to_cpu(x) + change;
+ x = be64_to_cpu(qc->qc_change) + change;
qc->qc_change = cpu_to_be64(x);
spin_lock(&sdp->sd_quota_spin);
@@ -743,7 +742,7 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct gfs2_holder i_gh;
- struct gfs2_quota q;
+ struct gfs2_quota_host q;
char buf[sizeof(struct gfs2_quota)];
struct file_ra_state ra_state;
int error;
@@ -1103,7 +1102,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
y++, slot++) {
- struct gfs2_quota_change qc;
+ struct gfs2_quota_change_host qc;
struct gfs2_quota_data *qd;
gfs2_quota_change_in(&qc, bh->b_data +
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 62cd223819b..d0c806b85c8 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -132,10 +132,11 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
*/
static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
- struct gfs2_log_header *head)
+ struct gfs2_log_header_host *head)
{
struct buffer_head *bh;
- struct gfs2_log_header lh;
+ struct gfs2_log_header_host lh;
+ const u32 nothing = 0;
u32 hash;
int error;
@@ -143,11 +144,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
if (error)
return error;
- memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
- lh.lh_hash = 0;
- hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
+ hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
+ sizeof(u32));
+ hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
+ hash ^= (u32)~0;
gfs2_log_header_in(&lh, bh->b_data);
-
brelse(bh);
if (lh.lh_header.mh_magic != GFS2_MAGIC ||
@@ -174,7 +175,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
*/
static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
- struct gfs2_log_header *head)
+ struct gfs2_log_header_host *head)
{
unsigned int orig_blk = *blk;
int error;
@@ -205,10 +206,10 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
* Returns: errno
*/
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
unsigned int blk = head->lh_blkno;
- struct gfs2_log_header lh;
+ struct gfs2_log_header_host lh;
int error;
for (;;) {
@@ -245,9 +246,9 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
* Returns: errno
*/
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
- struct gfs2_log_header lh_1, lh_m;
+ struct gfs2_log_header_host lh_1, lh_m;
u32 blk_1, blk_2, blk_m;
int error;
@@ -320,7 +321,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
length = be32_to_cpu(ld->ld_length);
if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
- struct gfs2_log_header lh;
+ struct gfs2_log_header_host lh;
error = get_log_header(jd, start, &lh);
if (!error) {
gfs2_replay_incr_blk(sdp, &start);
@@ -363,7 +364,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
* Returns: errno
*/
-static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
@@ -425,7 +426,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
- struct gfs2_log_header head;
+ struct gfs2_log_header_host head;
struct gfs2_holder j_gh, ji_gh, t_gh;
unsigned long t;
int ro = 0;
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 961feedf4d8..f7235e61c72 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -26,7 +26,7 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
void gfs2_revoke_clean(struct gfs2_sbd *sdp);
int gfs2_find_jhead(struct gfs2_jdesc *jd,
- struct gfs2_log_header *head);
+ struct gfs2_log_header_host *head);
int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
void gfs2_check_journals(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b261385c006..ff0846528d5 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -253,7 +253,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
}
-static inline int rgrp_contains_block(struct gfs2_rindex *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
{
u64 first = ri->ri_data0;
u64 last = first + ri->ri_data;
@@ -1217,7 +1217,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
al->al_alloced++;
gfs2_statfs_change(sdp, 0, -1, 0);
- gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
+ gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid);
spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone--;
@@ -1261,7 +1261,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
al->al_alloced++;
gfs2_statfs_change(sdp, 0, -1, 0);
- gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
+ gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_trans_add_unrevoke(sdp, block);
spin_lock(&sdp->sd_rindex_spin);
@@ -1337,8 +1337,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_trans_add_rg(rgd);
gfs2_statfs_change(sdp, 0, +blen, 0);
- gfs2_quota_change(ip, -(s64)blen,
- ip->i_di.di_uid, ip->i_di.di_gid);
+ gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
}
/**
@@ -1366,7 +1365,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_trans_add_rg(rgd);
gfs2_statfs_change(sdp, 0, +blen, 0);
- gfs2_quota_change(ip, -(s64)blen, ip->i_di.di_uid, ip->i_di.di_gid);
+ gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, bstart, blen);
}
@@ -1411,7 +1410,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
- gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
+ gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 6a78b1b32e2..43a24f2e590 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -97,7 +97,7 @@ void gfs2_tune_init(struct gfs2_tune *gt)
* changed.
*/
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
+int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
{
unsigned int x;
@@ -180,6 +180,24 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
return 0;
}
+/**
+ * gfs2_read_super - Read the gfs2 super block from disk
+ * @sb: The VFS super block
+ * @sector: The location of the super block
+ *
+ * This uses the bio functions to read the super block from disk
+ * because we want to be 100% sure that we never read cached data.
+ * A super block is read twice only during each GFS2 mount and is
+ * never written to by the filesystem. The first time its read no
+ * locks are held, and the only details which are looked at are those
+ * relating to the locking protocol. Once locking is up and working,
+ * the sb is read again under the lock to establish the location of
+ * the master directory (contains pointers to journals etc) and the
+ * root directory.
+ *
+ * Returns: A page containing the sb or NULL
+ */
+
struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
{
struct page *page;
@@ -199,7 +217,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
return NULL;
}
- bio->bi_sector = sector;
+ bio->bi_sector = sector * (sb->s_blocksize >> 9);
bio->bi_bdev = sb->s_bdev;
bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -508,7 +526,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
struct gfs2_holder t_gh;
- struct gfs2_log_header head;
+ struct gfs2_log_header_host head;
int error;
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
@@ -517,7 +535,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
return error;
gfs2_meta_cache_flush(ip);
- j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
+ j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head);
if (error)
@@ -587,9 +605,9 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
int gfs2_statfs_init(struct gfs2_sbd *sdp)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
- struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
+ struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
- struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+ struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct buffer_head *m_bh, *l_bh;
struct gfs2_holder gh;
int error;
@@ -634,7 +652,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
s64 dinodes)
{
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
- struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+ struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct buffer_head *l_bh;
int error;
@@ -660,8 +678,8 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
- struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
- struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+ struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+ struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct gfs2_holder gh;
struct buffer_head *m_bh, *l_bh;
int error;
@@ -727,10 +745,10 @@ out:
* Returns: errno
*/
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
+int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
{
- struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
- struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+ struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+ struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
spin_lock(&sdp->sd_statfs_spin);
@@ -760,7 +778,7 @@ int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
*/
static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
- struct gfs2_statfs_change *sc)
+ struct gfs2_statfs_change_host *sc)
{
gfs2_rgrp_verify(rgd);
sc->sc_total += rgd->rd_ri.ri_data;
@@ -782,7 +800,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
* Returns: errno
*/
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
+int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
{
struct gfs2_holder ri_gh;
struct gfs2_rgrpd *rgd_next;
@@ -792,7 +810,7 @@ int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
int done;
int error = 0, err;
- memset(sc, 0, sizeof(struct gfs2_statfs_change));
+ memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
if (!gha)
return -ENOMEM;
@@ -873,7 +891,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
struct gfs2_jdesc *jd;
struct lfcc *lfcc;
LIST_HEAD(list);
- struct gfs2_log_header lh;
+ struct gfs2_log_header_host lh;
int error;
error = gfs2_jindex_hold(sdp, &ji_gh);
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 5bb443ae0f5..e590b2df11d 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -14,7 +14,7 @@
void gfs2_tune_init(struct gfs2_tune *gt);
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
+int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
@@ -45,8 +45,8 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp);
void gfs2_statfs_change(struct gfs2_sbd *sdp,
s64 total, s64 free, s64 dinodes);
int gfs2_statfs_sync(struct gfs2_sbd *sdp);
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
+int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
+int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
int gfs2_freeze_fs(struct gfs2_sbd *sdp);
void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 0e0ec988f73..983eaf1e06b 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -426,9 +426,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
} \
TUNE_ATTR_2(name, name##_store)
-TUNE_ATTR(ilimit, 0);
-TUNE_ATTR(ilimit_tries, 0);
-TUNE_ATTR(ilimit_min, 0);
TUNE_ATTR(demote_secs, 0);
TUNE_ATTR(incore_log_blocks, 0);
TUNE_ATTR(log_flush_secs, 0);
@@ -447,7 +444,6 @@ TUNE_ATTR(quota_simul_sync, 1);
TUNE_ATTR(quota_cache_secs, 1);
TUNE_ATTR(max_atomic_write, 1);
TUNE_ATTR(stall_secs, 1);
-TUNE_ATTR(entries_per_readdir, 1);
TUNE_ATTR(greedy_default, 1);
TUNE_ATTR(greedy_quantum, 1);
TUNE_ATTR(greedy_max, 1);
@@ -459,9 +455,6 @@ TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
static struct attribute *tune_attrs[] = {
- &tune_attr_ilimit.attr,
- &tune_attr_ilimit_tries.attr,
- &tune_attr_ilimit_min.attr,
&tune_attr_demote_secs.attr,
&tune_attr_incore_log_blocks.attr,
&tune_attr_log_flush_secs.attr,
@@ -478,7 +471,6 @@ static struct attribute *tune_attrs[] = {
&tune_attr_quota_cache_secs.attr,
&tune_attr_max_atomic_write.attr,
&tune_attr_stall_secs.attr,
- &tune_attr_entries_per_readdir.attr,
&tune_attr_greedy_default.attr,
&tune_attr_greedy_quantum.attr,
&tune_attr_greedy_max.attr,
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 7984dcf89ad..28938a46cf4 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -83,8 +83,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
char *file, unsigned int line)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
- u32 magic = mh->mh_magic;
- magic = be32_to_cpu(magic);
+ u32 magic = be32_to_cpu(mh->mh_magic);
if (unlikely(magic != GFS2_MAGIC))
return gfs2_meta_check_ii(sdp, bh, "magic number", function,
file, line);
@@ -107,9 +106,8 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
char *file, unsigned int line)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
- u32 magic = mh->mh_magic;
+ u32 magic = be32_to_cpu(mh->mh_magic);
u16 t = be32_to_cpu(mh->mh_type);
- magic = be32_to_cpu(magic);
if (unlikely(magic != GFS2_MAGIC))
return gfs2_meta_check_ii(sdp, bh, "magic number", function,
file, line);
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index a7ae7c177ca..8b7e4c1e32a 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -54,8 +54,13 @@ struct gfs2_inum {
__be64 no_addr;
};
-static inline int gfs2_inum_equal(const struct gfs2_inum *ino1,
- const struct gfs2_inum *ino2)
+struct gfs2_inum_host {
+ __u64 no_formal_ino;
+ __u64 no_addr;
+};
+
+static inline int gfs2_inum_equal(const struct gfs2_inum_host *ino1,
+ const struct gfs2_inum_host *ino2)
{
return ino1->no_formal_ino == ino2->no_formal_ino &&
ino1->no_addr == ino2->no_addr;
@@ -89,6 +94,12 @@ struct gfs2_meta_header {
__be32 __pad1; /* Was incarnation number in gfs1 */
};
+struct gfs2_meta_header_host {
+ __u32 mh_magic;
+ __u32 mh_type;
+ __u32 mh_format;
+};
+
/*
* super-block structure
*
@@ -128,6 +139,23 @@ struct gfs2_sb {
/* In gfs1, quota and license dinodes followed */
};
+struct gfs2_sb_host {
+ struct gfs2_meta_header_host sb_header;
+
+ __u32 sb_fs_format;
+ __u32 sb_multihost_format;
+
+ __u32 sb_bsize;
+ __u32 sb_bsize_shift;
+
+ struct gfs2_inum_host sb_master_dir; /* Was jindex dinode in gfs1 */
+ struct gfs2_inum_host sb_root_dir;
+
+ char sb_lockproto[GFS2_LOCKNAME_LEN];
+ char sb_locktable[GFS2_LOCKNAME_LEN];
+ /* In gfs1, quota and license dinodes followed */
+};
+
/*
* resource index structure
*/
@@ -145,6 +173,14 @@ struct gfs2_rindex {
__u8 ri_reserved[64];
};
+struct gfs2_rindex_host {
+ __u64 ri_addr; /* grp block disk address */
+ __u64 ri_data0; /* first data location */
+ __u32 ri_length; /* length of rgrp header in fs blocks */
+ __u32 ri_data; /* num of data blocks in rgrp */
+ __u32 ri_bitbytes; /* number of bytes in data bitmaps */
+};
+
/*
* resource group header structure
*/
@@ -176,6 +212,13 @@ struct gfs2_rgrp {
__u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */
};
+struct gfs2_rgrp_host {
+ __u32 rg_flags;
+ __u32 rg_free;
+ __u32 rg_dinodes;
+ __u64 rg_igeneration;
+};
+
/*
* quota structure
*/
@@ -187,6 +230,12 @@ struct gfs2_quota {
__u8 qu_reserved[64];
};
+struct gfs2_quota_host {
+ __u64 qu_limit;
+ __u64 qu_warn;
+ __u64 qu_value;
+};
+
/*
* dinode structure
*/
@@ -270,6 +319,27 @@ struct gfs2_dinode {
__u8 di_reserved[56];
};
+struct gfs2_dinode_host {
+ __u64 di_size; /* number of bytes in file */
+ __u64 di_blocks; /* number of blocks in file */
+
+ /* This section varies from gfs1. Padding added to align with
+ * remainder of dinode
+ */
+ __u64 di_goal_meta; /* rgrp to alloc from next */
+ __u64 di_goal_data; /* data block goal */
+ __u64 di_generation; /* generation number for NFS */
+
+ __u32 di_flags; /* GFS2_DIF_... */
+ __u16 di_height; /* height of metadata */
+
+ /* These only apply to directories */
+ __u16 di_depth; /* Number of bits in the table */
+ __u32 di_entries; /* The number of entries in the directory */
+
+ __u64 di_eattr; /* extended attribute block number */
+};
+
/*
* directory structure - many of these per directory file
*/
@@ -344,6 +414,16 @@ struct gfs2_log_header {
__be32 lh_hash;
};
+struct gfs2_log_header_host {
+ struct gfs2_meta_header_host lh_header;
+
+ __u64 lh_sequence; /* Sequence number of this transaction */
+ __u32 lh_flags; /* GFS2_LOG_HEAD_... */
+ __u32 lh_tail; /* Block number of log tail */
+ __u32 lh_blkno;
+ __u32 lh_hash;
+};
+
/*
* Log type descriptor
*/
@@ -384,6 +464,11 @@ struct gfs2_inum_range {
__be64 ir_length;
};
+struct gfs2_inum_range_host {
+ __u64 ir_start;
+ __u64 ir_length;
+};
+
/*
* Statfs change
* Describes an change to the pool of free and allocated
@@ -396,6 +481,12 @@ struct gfs2_statfs_change {
__be64 sc_dinodes;
};
+struct gfs2_statfs_change_host {
+ __u64 sc_total;
+ __u64 sc_free;
+ __u64 sc_dinodes;
+};
+
/*
* Quota change
* Describes an allocation change for a particular
@@ -410,33 +501,38 @@ struct gfs2_quota_change {
__be32 qc_id;
};
+struct gfs2_quota_change_host {
+ __u64 qc_change;
+ __u32 qc_flags; /* GFS2_QCF_... */
+ __u32 qc_id;
+};
+
#ifdef __KERNEL__
/* Translation functions */
-extern void gfs2_inum_in(struct gfs2_inum *no, const void *buf);
-extern void gfs2_inum_out(const struct gfs2_inum *no, void *buf);
-extern void gfs2_sb_in(struct gfs2_sb *sb, const void *buf);
-extern void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf);
-extern void gfs2_rindex_out(const struct gfs2_rindex *ri, void *buf);
-extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf);
-extern void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf);
-extern void gfs2_quota_in(struct gfs2_quota *qu, const void *buf);
-extern void gfs2_quota_out(const struct gfs2_quota *qu, void *buf);
-extern void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf);
-extern void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf);
+extern void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf);
+extern void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf);
+extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf);
+extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf);
+extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf);
+extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf);
+extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf);
+extern void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf);
+struct gfs2_inode;
+extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf);
extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf);
-extern void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf);
-extern void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf);
-extern void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf);
-extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf);
-extern void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf);
-extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf);
+extern void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf);
+extern void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf);
+extern void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf);
+extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf);
+extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf);
+extern void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf);
/* Printing functions */
-extern void gfs2_rindex_print(const struct gfs2_rindex *ri);
-extern void gfs2_dinode_print(const struct gfs2_dinode *di);
+extern void gfs2_rindex_print(const struct gfs2_rindex_host *ri);
+extern void gfs2_dinode_print(const struct gfs2_inode *ip);
#endif /* __KERNEL__ */