diff options
Diffstat (limited to 'fs')
57 files changed, 4543 insertions, 2238 deletions
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index f3ac4154cbb..44ec72684df 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) tcon->nativeFileSystem); } seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" - "\n\tPathComponentMax: %d Status: 0x%d", + "\n\tPathComponentMax: %d Status: %d", le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), le32_to_cpu(tcon->fsAttrInfo.Attributes), le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 70f178a7c75..56048026333 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.03" +#define CIFS_VERSION "2.04" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index de6aed8c78e..0012e1e291d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -404,6 +404,11 @@ struct smb_version_operations { const struct cifs_fid *, u32 *); int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, int); + /* writepages retry size */ + unsigned int (*wp_retry_size)(struct inode *); + /* get mtu credits */ + int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, + unsigned int *, unsigned int *); }; struct smb_version_values { @@ -640,6 +645,16 @@ add_credits(struct TCP_Server_Info *server, const unsigned int add, } static inline void +add_credits_and_wake_if(struct TCP_Server_Info *server, const unsigned int add, + const int optype) +{ + if (add) { + server->ops->add_credits(server, add, optype); + wake_up(&server->request_q); + } +} + +static inline void set_credits(struct TCP_Server_Info *server, const int val) { server->ops->set_credits(server, val); @@ -1044,6 +1059,7 @@ struct cifs_readdata { struct address_space *mapping; __u64 offset; unsigned int bytes; + unsigned int got_bytes; pid_t pid; int result; struct work_struct work; @@ -1053,6 +1069,7 @@ struct cifs_readdata { struct kvec iov; unsigned int pagesz; unsigned int tailsz; + unsigned int credits; unsigned int nr_pages; struct page *pages[]; }; @@ -1073,6 +1090,7 @@ struct cifs_writedata { int result; unsigned int pagesz; unsigned int tailsz; + unsigned int credits; unsigned int nr_pages; struct page *pages[]; }; @@ -1398,6 +1416,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define CIFS_OBREAK_OP 0x0100 /* oplock break request */ #define CIFS_NEG_OP 0x0200 /* negotiate request */ #define CIFS_OP_MASK 0x0380 /* mask request type */ +#define CIFS_HAS_CREDITS 0x0400 /* already has credits */ /* Security Flags: indicate type of session setup needed */ #define CIFSSEC_MAY_SIGN 0x00001 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index ca7980a1e30..c31ce98c170 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -36,6 +36,7 @@ extern struct smb_hdr *cifs_buf_get(void); extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); +extern void free_rsp_buf(int, void *); extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, struct kvec *iov); extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, @@ -89,6 +90,9 @@ extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *, struct smb_rqst *); extern int cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); +extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server, + unsigned int size, unsigned int *num, + unsigned int *credits); extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */ , const int flags); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6ce4e0954b9..66f65001a6d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -196,10 +196,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) if (rc) goto out; - /* - * FIXME: check if wsize needs updated due to negotiated smb buffer - * size shrinking - */ atomic_inc(&tconInfoReconnectCount); /* tell server Unix caps we support */ @@ -1517,7 +1513,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return length; server->total_read += length; - rdata->bytes = length; cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", server->total_read, buflen, data_len); @@ -1560,12 +1555,18 @@ cifs_readv_callback(struct mid_q_entry *mid) rc); } /* FIXME: should this be counted toward the initiating task? */ - task_io_account_read(rdata->bytes); - cifs_stats_bytes_read(tcon, rdata->bytes); + task_io_account_read(rdata->got_bytes); + cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: case MID_RETRY_NEEDED: rdata->result = -EAGAIN; + if (server->sign && rdata->got_bytes) + /* reset bytes number since we can not check a sign */ + rdata->got_bytes = 0; + /* FIXME: should this be counted toward the initiating task? */ + task_io_account_read(rdata->got_bytes); + cifs_stats_bytes_read(tcon, rdata->got_bytes); break; default: rdata->result = -EIO; @@ -1734,10 +1735,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ if (*buf) { - if (resp_buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(iov[0].iov_base); - else if (resp_buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); + free_rsp_buf(resp_buf_type, iov[0].iov_base); } else if (resp_buf_type != CIFS_NO_BUFFER) { /* return buffer to caller to free */ *buf = iov[0].iov_base; @@ -1899,28 +1897,80 @@ cifs_writedata_release(struct kref *refcount) static void cifs_writev_requeue(struct cifs_writedata *wdata) { - int i, rc; + int i, rc = 0; struct inode *inode = wdata->cfile->dentry->d_inode; struct TCP_Server_Info *server; + unsigned int rest_len; - for (i = 0; i < wdata->nr_pages; i++) { - lock_page(wdata->pages[i]); - clear_page_dirty_for_io(wdata->pages[i]); - } - + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + i = 0; + rest_len = wdata->bytes; do { - server = tlink_tcon(wdata->cfile->tlink)->ses->server; - rc = server->ops->async_writev(wdata, cifs_writedata_release); - } while (rc == -EAGAIN); + struct cifs_writedata *wdata2; + unsigned int j, nr_pages, wsize, tailsz, cur_len; + + wsize = server->ops->wp_retry_size(inode); + if (wsize < rest_len) { + nr_pages = wsize / PAGE_CACHE_SIZE; + if (!nr_pages) { + rc = -ENOTSUPP; + break; + } + cur_len = nr_pages * PAGE_CACHE_SIZE; + tailsz = PAGE_CACHE_SIZE; + } else { + nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE); + cur_len = rest_len; + tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE; + } - for (i = 0; i < wdata->nr_pages; i++) { - unlock_page(wdata->pages[i]); - if (rc != 0) { - SetPageError(wdata->pages[i]); - end_page_writeback(wdata->pages[i]); - page_cache_release(wdata->pages[i]); + wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete); + if (!wdata2) { + rc = -ENOMEM; + break; } - } + + for (j = 0; j < nr_pages; j++) { + wdata2->pages[j] = wdata->pages[i + j]; + lock_page(wdata2->pages[j]); + clear_page_dirty_for_io(wdata2->pages[j]); + } + + wdata2->sync_mode = wdata->sync_mode; + wdata2->nr_pages = nr_pages; + wdata2->offset = page_offset(wdata2->pages[0]); + wdata2->pagesz = PAGE_CACHE_SIZE; + wdata2->tailsz = tailsz; + wdata2->bytes = cur_len; + + wdata2->cfile = find_writable_file(CIFS_I(inode), false); + if (!wdata2->cfile) { + cifs_dbg(VFS, "No writable handles for inode\n"); + rc = -EBADF; + break; + } + wdata2->pid = wdata2->cfile->pid; + rc = server->ops->async_writev(wdata2, cifs_writedata_release); + + for (j = 0; j < nr_pages; j++) { + unlock_page(wdata2->pages[j]); + if (rc != 0 && rc != -EAGAIN) { + SetPageError(wdata2->pages[j]); + end_page_writeback(wdata2->pages[j]); + page_cache_release(wdata2->pages[j]); + } + } + + if (rc) { + kref_put(&wdata2->refcount, cifs_writedata_release); + if (rc == -EAGAIN) + continue; + break; + } + + rest_len -= cur_len; + i += nr_pages; + } while (i < wdata->nr_pages); mapping_set_error(inode->i_mapping, rc); kref_put(&wdata->refcount, cifs_writedata_release); @@ -2203,10 +2253,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, } /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ - if (resp_buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(iov[0].iov_base); - else if (resp_buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); + free_rsp_buf(resp_buf_type, iov[0].iov_base); /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ @@ -2451,10 +2498,7 @@ plk_err_exit: if (pSMB) cifs_small_buf_release(pSMB); - if (resp_buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(iov[0].iov_base); - else if (resp_buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); + free_rsp_buf(resp_buf_type, iov[0].iov_base); /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ @@ -3838,10 +3882,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, } } qsec_out: - if (buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(iov[0].iov_base); - else if (buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); + free_rsp_buf(buf_type, iov[0].iov_base); /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ return rc; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b98366f21f9..03ed8a09581 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -557,7 +557,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, try_to_freeze(); if (server_unresponsive(server)) { - total_read = -EAGAIN; + total_read = -ECONNABORTED; break; } @@ -571,7 +571,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, break; } else if (server->tcpStatus == CifsNeedReconnect) { cifs_reconnect(server); - total_read = -EAGAIN; + total_read = -ECONNABORTED; break; } else if (length == -ERESTARTSYS || length == -EAGAIN || @@ -588,7 +588,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, cifs_dbg(FYI, "Received no data or error: expecting %d\n" "got %d", to_read, length); cifs_reconnect(server); - total_read = -EAGAIN; + total_read = -ECONNABORTED; break; } } @@ -786,7 +786,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); cifs_reconnect(server); wake_up(&server->response_q); - return -EAGAIN; + return -ECONNABORTED; } /* switch to large buffer if too big for a small one */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index b88b1ade4d3..4ab2f79ffa7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1670,8 +1670,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, break; } - len = min((size_t)cifs_sb->wsize, - write_size - total_written); + len = min(server->ops->wp_retry_size(dentry->d_inode), + (unsigned int)write_size - total_written); /* iov[0] is reserved for smb header */ iov[1].iov_base = (char *)write_data + total_written; iov[1].iov_len = len; @@ -1878,15 +1878,163 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } +static struct cifs_writedata * +wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping, + pgoff_t end, pgoff_t *index, + unsigned int *found_pages) +{ + unsigned int nr_pages; + struct page **pages; + struct cifs_writedata *wdata; + + wdata = cifs_writedata_alloc((unsigned int)tofind, + cifs_writev_complete); + if (!wdata) + return NULL; + + /* + * find_get_pages_tag seems to return a max of 256 on each + * iteration, so we must call it several times in order to + * fill the array or the wsize is effectively limited to + * 256 * PAGE_CACHE_SIZE. + */ + *found_pages = 0; + pages = wdata->pages; + do { + nr_pages = find_get_pages_tag(mapping, index, + PAGECACHE_TAG_DIRTY, tofind, + pages); + *found_pages += nr_pages; + tofind -= nr_pages; + pages += nr_pages; + } while (nr_pages && tofind && *index <= end); + + return wdata; +} + +static unsigned int +wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages, + struct address_space *mapping, + struct writeback_control *wbc, + pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done) +{ + unsigned int nr_pages = 0, i; + struct page *page; + + for (i = 0; i < found_pages; i++) { + page = wdata->pages[i]; + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + + if (nr_pages == 0) + lock_page(page); + else if (!trylock_page(page)) + break; + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + break; + } + + if (!wbc->range_cyclic && page->index > end) { + *done = true; + unlock_page(page); + break; + } + + if (*next && (page->index != *next)) { + /* Not next consecutive page */ + unlock_page(page); + break; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + break; + } + + /* + * This actually clears the dirty bit in the radix tree. + * See cifs_writepage() for more commentary. + */ + set_page_writeback(page); + if (page_offset(page) >= i_size_read(mapping->host)) { + *done = true; + unlock_page(page); + end_page_writeback(page); + break; + } + + wdata->pages[i] = page; + *next = page->index + 1; + ++nr_pages; + } + + /* reset index to refind any pages skipped */ + if (nr_pages == 0) + *index = wdata->pages[0]->index + 1; + + /* put any pages we aren't going to use */ + for (i = nr_pages; i < found_pages; i++) { + page_cache_release(wdata->pages[i]); + wdata->pages[i] = NULL; + } + + return nr_pages; +} + +static int +wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages, + struct address_space *mapping, struct writeback_control *wbc) +{ + int rc = 0; + struct TCP_Server_Info *server; + unsigned int i; + + wdata->sync_mode = wbc->sync_mode; + wdata->nr_pages = nr_pages; + wdata->offset = page_offset(wdata->pages[0]); + wdata->pagesz = PAGE_CACHE_SIZE; + wdata->tailsz = min(i_size_read(mapping->host) - + page_offset(wdata->pages[nr_pages - 1]), + (loff_t)PAGE_CACHE_SIZE); + wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz; + + if (wdata->cfile != NULL) + cifsFileInfo_put(wdata->cfile); + wdata->cfile = find_writable_file(CIFS_I(mapping->host), false); + if (!wdata->cfile) { + cifs_dbg(VFS, "No writable handles for inode\n"); + rc = -EBADF; + } else { + wdata->pid = wdata->cfile->pid; + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + rc = server->ops->async_writev(wdata, cifs_writedata_release); + } + + for (i = 0; i < nr_pages; ++i) + unlock_page(wdata->pages[i]); + + return rc; +} + static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb); + struct TCP_Server_Info *server; bool done = false, scanned = false, range_whole = false; pgoff_t end, index; struct cifs_writedata *wdata; - struct TCP_Server_Info *server; - struct page *page; int rc = 0; /* @@ -1906,152 +2054,50 @@ static int cifs_writepages(struct address_space *mapping, range_whole = true; scanned = true; } + server = cifs_sb_master_tcon(cifs_sb)->ses->server; retry: while (!done && index <= end) { - unsigned int i, nr_pages, found_pages; - pgoff_t next = 0, tofind; - struct page **pages; + unsigned int i, nr_pages, found_pages, wsize, credits; + pgoff_t next = 0, tofind, saved_index = index; + + rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, + &wsize, &credits); + if (rc) + break; - tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, - end - index) + 1; + tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1; - wdata = cifs_writedata_alloc((unsigned int)tofind, - cifs_writev_complete); + wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index, + &found_pages); if (!wdata) { rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } - /* - * find_get_pages_tag seems to return a max of 256 on each - * iteration, so we must call it several times in order to - * fill the array or the wsize is effectively limited to - * 256 * PAGE_CACHE_SIZE. - */ - found_pages = 0; - pages = wdata->pages; - do { - nr_pages = find_get_pages_tag(mapping, &index, - PAGECACHE_TAG_DIRTY, - tofind, pages); - found_pages += nr_pages; - tofind -= nr_pages; - pages += nr_pages; - } while (nr_pages && tofind && index <= end); - if (found_pages == 0) { kref_put(&wdata->refcount, cifs_writedata_release); + add_credits_and_wake_if(server, credits, 0); break; } - nr_pages = 0; - for (i = 0; i < found_pages; i++) { - page = wdata->pages[i]; - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - - if (nr_pages == 0) - lock_page(page); - else if (!trylock_page(page)) - break; - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - break; - } - - if (!wbc->range_cyclic && page->index > end) { - done = true; - unlock_page(page); - break; - } - - if (next && (page->index != next)) { - /* Not next consecutive page */ - unlock_page(page); - break; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - break; - } - - /* - * This actually clears the dirty bit in the radix tree. - * See cifs_writepage() for more commentary. - */ - set_page_writeback(page); - - if (page_offset(page) >= i_size_read(mapping->host)) { - done = true; - unlock_page(page); - end_page_writeback(page); - break; - } - - wdata->pages[i] = page; - next = page->index + 1; - ++nr_pages; - } - - /* reset index to refind any pages skipped */ - if (nr_pages == 0) - index = wdata->pages[0]->index + 1; - - /* put any pages we aren't going to use */ - for (i = nr_pages; i < found_pages; i++) { - page_cache_release(wdata->pages[i]); - wdata->pages[i] = NULL; - } + nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc, + end, &index, &next, &done); /* nothing to write? */ if (nr_pages == 0) { kref_put(&wdata->refcount, cifs_writedata_release); + add_credits_and_wake_if(server, credits, 0); continue; } - wdata->sync_mode = wbc->sync_mode; - wdata->nr_pages = nr_pages; - wdata->offset = page_offset(wdata->pages[0]); - wdata->pagesz = PAGE_CACHE_SIZE; - wdata->tailsz = - min(i_size_read(mapping->host) - - page_offset(wdata->pages[nr_pages - 1]), - (loff_t)PAGE_CACHE_SIZE); - wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + - wdata->tailsz; - - do { - if (wdata->cfile != NULL) - cifsFileInfo_put(wdata->cfile); - wdata->cfile = find_writable_file(CIFS_I(mapping->host), - false); - if (!wdata->cfile) { - cifs_dbg(VFS, "No writable handles for inode\n"); - rc = -EBADF; - break; - } - wdata->pid = wdata->cfile->pid; - server = tlink_tcon(wdata->cfile->tlink)->ses->server; - rc = server->ops->async_writev(wdata, - cifs_writedata_release); - } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); + wdata->credits = credits; - for (i = 0; i < nr_pages; ++i) - unlock_page(wdata->pages[i]); + rc = wdata_send_pages(wdata, nr_pages, mapping, wbc); /* send failure -- clean up the mess */ if (rc != 0) { + add_credits_and_wake_if(server, wdata->credits, 0); for (i = 0; i < nr_pages; ++i) { if (rc == -EAGAIN) redirty_page_for_writepage(wbc, @@ -2066,6 +2112,11 @@ retry: } kref_put(&wdata->refcount, cifs_writedata_release); + if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) { + index = saved_index; + continue; + } + wbc->nr_to_write -= nr_pages; if (wbc->nr_to_write <= 0) done = true; @@ -2362,123 +2413,109 @@ cifs_uncached_writev_complete(struct work_struct *work) kref_put(&wdata->refcount, cifs_uncached_writedata_release); } -/* attempt to send write to server, retry on any -EAGAIN errors */ static int -cifs_uncached_retry_writev(struct cifs_writedata *wdata) +wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, + size_t *len, unsigned long *num_pages) { - int rc; - struct TCP_Server_Info *server; + size_t save_len, copied, bytes, cur_len = *len; + unsigned long i, nr_pages = *num_pages; - server = tlink_tcon(wdata->cfile->tlink)->ses->server; + save_len = cur_len; + for (i = 0; i < nr_pages; i++) { + bytes = min_t(const size_t, cur_len, PAGE_SIZE); + copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from); + cur_len -= copied; + /* + * If we didn't copy as much as we expected, then that + * may mean we trod into an unmapped area. Stop copying + * at that point. On the next pass through the big + * loop, we'll likely end up getting a zero-length + * write and bailing out of it. + */ + if (copied < bytes) + break; + } + cur_len = save_len - cur_len; + *len = cur_len; - do { - if (wdata->cfile->invalidHandle) { - rc = cifs_reopen_file(wdata->cfile, false); - if (rc != 0) - continue; - } - rc = server->ops->async_writev(wdata, - cifs_uncached_writedata_release); - } while (rc == -EAGAIN); + /* + * If we have no data to send, then that probably means that + * the copy above failed altogether. That's most likely because + * the address in the iovec was bogus. Return -EFAULT and let + * the caller free anything we allocated and bail out. + */ + if (!cur_len) + return -EFAULT; - return rc; + /* + * i + 1 now represents the number of pages we actually used in + * the copy phase above. + */ + *num_pages = i + 1; + return 0; } -static ssize_t -cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) +static int +cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, + struct cifsFileInfo *open_file, + struct cifs_sb_info *cifs_sb, struct list_head *wdata_list) { - unsigned long nr_pages, i; - size_t bytes, copied, len, cur_len; - ssize_t total_written = 0; - loff_t offset; - struct cifsFileInfo *open_file; - struct cifs_tcon *tcon; - struct cifs_sb_info *cifs_sb; - struct cifs_writedata *wdata, *tmp; - struct list_head wdata_list; - int rc; + int rc = 0; + size_t cur_len; + unsigned long nr_pages, num_pages, i; + struct cifs_writedata *wdata; + struct iov_iter saved_from; + loff_t saved_offset = offset; pid_t pid; - - len = iov_iter_count(from); - rc = generic_write_checks(file, poffset, &len, 0); - if (rc) - return rc; - - if (!len) - return 0; - - iov_iter_truncate(from, len); - - INIT_LIST_HEAD(&wdata_list); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - open_file = file->private_data; - tcon = tlink_tcon(open_file->tlink); - - if (!tcon->ses->server->ops->async_writev) - return -ENOSYS; - - offset = *poffset; + struct TCP_Server_Info *server; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; + server = tlink_tcon(open_file->tlink)->ses->server; + memcpy(&saved_from, from, sizeof(struct iov_iter)); + do { - size_t save_len; + unsigned int wsize, credits; + + rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, + &wsize, &credits); + if (rc) + break; - nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len); + nr_pages = get_numpages(wsize, len, &cur_len); wdata = cifs_writedata_alloc(nr_pages, cifs_uncached_writev_complete); if (!wdata) { rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } rc = cifs_write_allocate_pages(wdata->pages, nr_pages); if (rc) { kfree(wdata); + add_credits_and_wake_if(server, credits, 0); break; } - save_len = cur_len; - for (i = 0; i < nr_pages; i++) { - bytes = min_t(size_t, cur_len, PAGE_SIZE); - copied = copy_page_from_iter(wdata->pages[i], 0, bytes, - from); - cur_len -= copied; - /* - * If we didn't copy as much as we expected, then that - * may mean we trod into an unmapped area. Stop copying - * at that point. On the next pass through the big - * loop, we'll likely end up getting a zero-length - * write and bailing out of it. - */ - if (copied < bytes) - break; - } - cur_len = save_len - cur_len; - - /* - * If we have no data to send, then that probably means that - * the copy above failed altogether. That's most likely because - * the address in the iovec was bogus. Set the rc to -EFAULT, - * free anything we allocated and bail out. - */ - if (!cur_len) { + num_pages = nr_pages; + rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages); + if (rc) { for (i = 0; i < nr_pages; i++) put_page(wdata->pages[i]); kfree(wdata); - rc = -EFAULT; + add_credits_and_wake_if(server, credits, 0); break; } /* - * i + 1 now represents the number of pages we actually used in - * the copy phase above. Bring nr_pages down to that, and free - * any pages that we didn't use. + * Bring nr_pages down to the number of pages we actually used, + * and free any pages that we didn't use. */ - for ( ; nr_pages > i + 1; nr_pages--) + for ( ; nr_pages > num_pages; nr_pages--) put_page(wdata->pages[nr_pages - 1]); wdata->sync_mode = WB_SYNC_ALL; @@ -2489,18 +2526,69 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) wdata->bytes = cur_len; wdata->pagesz = PAGE_SIZE; wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); - rc = cifs_uncached_retry_writev(wdata); + wdata->credits = credits; + + if (!wdata->cfile->invalidHandle || + !cifs_reopen_file(wdata->cfile, false)) + rc = server->ops->async_writev(wdata, + cifs_uncached_writedata_release); if (rc) { + add_credits_and_wake_if(server, wdata->credits, 0); kref_put(&wdata->refcount, cifs_uncached_writedata_release); + if (rc == -EAGAIN) { + memcpy(from, &saved_from, + sizeof(struct iov_iter)); + iov_iter_advance(from, offset - saved_offset); + continue; + } break; } - list_add_tail(&wdata->list, &wdata_list); + list_add_tail(&wdata->list, wdata_list); offset += cur_len; len -= cur_len; } while (len > 0); + return rc; +} + +static ssize_t +cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) +{ + size_t len; + ssize_t total_written = 0; + struct cifsFileInfo *open_file; + struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; + struct cifs_writedata *wdata, *tmp; + struct list_head wdata_list; + struct iov_iter saved_from; + int rc; + + len = iov_iter_count(from); + rc = generic_write_checks(file, poffset, &len, 0); + if (rc) + return rc; + + if (!len) + return 0; + + iov_iter_truncate(from, len); + + INIT_LIST_HEAD(&wdata_list); + cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + open_file = file->private_data; + tcon = tlink_tcon(open_file->tlink); + + if (!tcon->ses->server->ops->async_writev) + return -ENOSYS; + + memcpy(&saved_from, from, sizeof(struct iov_iter)); + + rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb, + &wdata_list); + /* * If at least one write was successfully sent, then discard any rc * value from the later writes. If the other write succeeds, then @@ -2529,7 +2617,25 @@ restart_loop: /* resend call if it's a retryable error */ if (rc == -EAGAIN) { - rc = cifs_uncached_retry_writev(wdata); + struct list_head tmp_list; + struct iov_iter tmp_from; + + INIT_LIST_HEAD(&tmp_list); + list_del_init(&wdata->list); + + memcpy(&tmp_from, &saved_from, + sizeof(struct iov_iter)); + iov_iter_advance(&tmp_from, + wdata->offset - *poffset); + + rc = cifs_write_from_iter(wdata->offset, + wdata->bytes, &tmp_from, + open_file, cifs_sb, &tmp_list); + + list_splice(&tmp_list, &wdata_list); + + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); goto restart_loop; } } @@ -2722,26 +2828,6 @@ cifs_uncached_readdata_release(struct kref *refcount) cifs_readdata_release(refcount); } -static int -cifs_retry_async_readv(struct cifs_readdata *rdata) -{ - int rc; - struct TCP_Server_Info *server; - - server = tlink_tcon(rdata->cfile->tlink)->ses->server; - - do { - if (rdata->cfile->invalidHandle) { - rc = cifs_reopen_file(rdata->cfile, true); - if (rc != 0) - continue; - } - rc = server->ops->async_readv(rdata); - } while (rc == -EAGAIN); - - return rc; -} - /** * cifs_readdata_to_iov - copy data from pages in response to an iovec * @rdata: the readdata response with list of pages holding data @@ -2754,7 +2840,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata) static int cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) { - size_t remaining = rdata->bytes; + size_t remaining = rdata->got_bytes; unsigned int i; for (i = 0; i < rdata->nr_pages; i++) { @@ -2782,11 +2868,12 @@ static int cifs_uncached_read_into_pages(struct TCP_Server_Info *server, struct cifs_readdata *rdata, unsigned int len) { - int total_read = 0, result = 0; + int result = 0; unsigned int i; unsigned int nr_pages = rdata->nr_pages; struct kvec iov; + rdata->got_bytes = 0; rdata->tailsz = PAGE_SIZE; for (i = 0; i < nr_pages; i++) { struct page *page = rdata->pages[i]; @@ -2820,55 +2907,45 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, if (result < 0) break; - total_read += result; + rdata->got_bytes += result; } - return total_read > 0 ? total_read : result; + return rdata->got_bytes > 0 && result != -ECONNABORTED ? + rdata->got_bytes : result; } -ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +static int +cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, + struct cifs_sb_info *cifs_sb, struct list_head *rdata_list) { - struct file *file = iocb->ki_filp; - ssize_t rc; - size_t len, cur_len; - ssize_t total_read = 0; - loff_t offset = iocb->ki_pos; - unsigned int npages; - struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; - struct cifsFileInfo *open_file; - struct cifs_readdata *rdata, *tmp; - struct list_head rdata_list; + struct cifs_readdata *rdata; + unsigned int npages, rsize, credits; + size_t cur_len; + int rc; pid_t pid; + struct TCP_Server_Info *server; - len = iov_iter_count(to); - if (!len) - return 0; - - INIT_LIST_HEAD(&rdata_list); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - open_file = file->private_data; - tcon = tlink_tcon(open_file->tlink); - - if (!tcon->ses->server->ops->async_readv) - return -ENOSYS; + server = tlink_tcon(open_file->tlink)->ses->server; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; - if ((file->f_flags & O_ACCMODE) == O_WRONLY) - cifs_dbg(FYI, "attempting read on write only file instance\n"); - do { - cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); + rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, + &rsize, &credits); + if (rc) + break; + + cur_len = min_t(const size_t, len, rsize); npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); /* allocate a readdata struct */ rdata = cifs_readdata_alloc(npages, cifs_uncached_readv_complete); if (!rdata) { + add_credits_and_wake_if(server, credits, 0); rc = -ENOMEM; break; } @@ -2884,44 +2961,113 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) rdata->pid = pid; rdata->pagesz = PAGE_SIZE; rdata->read_into_pages = cifs_uncached_read_into_pages; + rdata->credits = credits; - rc = cifs_retry_async_readv(rdata); + if (!rdata->cfile->invalidHandle || + !cifs_reopen_file(rdata->cfile, true)) + rc = server->ops->async_readv(rdata); error: if (rc) { + add_credits_and_wake_if(server, rdata->credits, 0); kref_put(&rdata->refcount, cifs_uncached_readdata_release); + if (rc == -EAGAIN) + continue; break; } - list_add_tail(&rdata->list, &rdata_list); + list_add_tail(&rdata->list, rdata_list); offset += cur_len; len -= cur_len; } while (len > 0); + return rc; +} + +ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + ssize_t rc; + size_t len; + ssize_t total_read = 0; + loff_t offset = iocb->ki_pos; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct cifsFileInfo *open_file; + struct cifs_readdata *rdata, *tmp; + struct list_head rdata_list; + + len = iov_iter_count(to); + if (!len) + return 0; + + INIT_LIST_HEAD(&rdata_list); + cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + open_file = file->private_data; + tcon = tlink_tcon(open_file->tlink); + + if (!tcon->ses->server->ops->async_readv) + return -ENOSYS; + + if ((file->f_flags & O_ACCMODE) == O_WRONLY) + cifs_dbg(FYI, "attempting read on write only file instance\n"); + + rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list); + /* if at least one read request send succeeded, then reset rc */ if (!list_empty(&rdata_list)) rc = 0; len = iov_iter_count(to); /* the loop below should proceed in the order of increasing offsets */ +again: list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { - again: if (!rc) { /* FIXME: freezable sleep too? */ rc = wait_for_completion_killable(&rdata->done); if (rc) rc = -EINTR; - else if (rdata->result) { - rc = rdata->result; + else if (rdata->result == -EAGAIN) { /* resend call if it's a retryable error */ - if (rc == -EAGAIN) { - rc = cifs_retry_async_readv(rdata); - goto again; + struct list_head tmp_list; + unsigned int got_bytes = rdata->got_bytes; + + list_del_init(&rdata->list); + INIT_LIST_HEAD(&tmp_list); + + /* + * Got a part of data and then reconnect has + * happened -- fill the buffer and continue + * reading. + */ + if (got_bytes && got_bytes < rdata->bytes) { + rc = cifs_readdata_to_iov(rdata, to); + if (rc) { + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + continue; + } } - } else { + + rc = cifs_send_async_read( + rdata->offset + got_bytes, + rdata->bytes - got_bytes, + rdata->cfile, cifs_sb, + &tmp_list); + + list_splice(&tmp_list, &rdata_list); + + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + goto again; + } else if (rdata->result) + rc = rdata->result; + else rc = cifs_readdata_to_iov(rdata, to); - } + /* if there was a short read -- discard anything left */ + if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) + rc = -ENODATA; } list_del_init(&rdata->list); kref_put(&rdata->refcount, cifs_uncached_readdata_release); @@ -3030,18 +3176,19 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) for (total_read = 0, cur_offset = read_data; read_size > total_read; total_read += bytes_read, cur_offset += bytes_read) { - current_read_size = min_t(uint, read_size - total_read, rsize); - /* - * For windows me and 9x we do not want to request more than it - * negotiated since it will refuse the read then. - */ - if ((tcon->ses) && !(tcon->ses->capabilities & + do { + current_read_size = min_t(uint, read_size - total_read, + rsize); + /* + * For windows me and 9x we do not want to request more + * than it negotiated since it will refuse the read + * then. + */ + if ((tcon->ses) && !(tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)) { - current_read_size = min_t(uint, current_read_size, - CIFSMaxBufSize); - } - rc = -EAGAIN; - while (rc == -EAGAIN) { + current_read_size = min_t(uint, + current_read_size, CIFSMaxBufSize); + } if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, true); if (rc != 0) @@ -3054,7 +3201,8 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) rc = server->ops->sync_read(xid, open_file, &io_parms, &bytes_read, &cur_offset, &buf_type); - } + } while (rc == -EAGAIN); + if (rc || (bytes_read == 0)) { if (total_read) { break; @@ -3133,25 +3281,30 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) static void cifs_readv_complete(struct work_struct *work) { - unsigned int i; + unsigned int i, got_bytes; struct cifs_readdata *rdata = container_of(work, struct cifs_readdata, work); + got_bytes = rdata->got_bytes; for (i = 0; i < rdata->nr_pages; i++) { struct page *page = rdata->pages[i]; lru_cache_add_file(page); - if (rdata->result == 0) { + if (rdata->result == 0 || + (rdata->result == -EAGAIN && got_bytes)) { flush_dcache_page(page); SetPageUptodate(page); } unlock_page(page); - if (rdata->result == 0) + if (rdata->result == 0 || + (rdata->result == -EAGAIN && got_bytes)) cifs_readpage_to_fscache(rdata->mapping->host, page); + got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes); + page_cache_release(page); rdata->pages[i] = NULL; } @@ -3162,7 +3315,7 @@ static int cifs_readpages_read_into_pages(struct TCP_Server_Info *server, struct cifs_readdata *rdata, unsigned int len) { - int total_read = 0, result = 0; + int result = 0; unsigned int i; u64 eof; pgoff_t eof_index; @@ -3174,6 +3327,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index); + rdata->got_bytes = 0; rdata->tailsz = PAGE_CACHE_SIZE; for (i = 0; i < nr_pages; i++) { struct page *page = rdata->pages[i]; @@ -3228,10 +3382,70 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, if (result < 0) break; - total_read += result; + rdata->got_bytes += result; } - return total_read > 0 ? total_read : result; + return rdata->got_bytes > 0 && result != -ECONNABORTED ? + rdata->got_bytes : result; +} + +static int +readpages_get_pages(struct address_space *mapping, struct list_head *page_list, + unsigned int rsize, struct list_head *tmplist, + unsigned int *nr_pages, loff_t *offset, unsigned int *bytes) +{ + struct page *page, *tpage; + unsigned int expected_index; + int rc; + + INIT_LIST_HEAD(tmplist); + + page = list_entry(page_list->prev, struct page, lru); + + /* + * Lock the page and put it in the cache. Since no one else + * should have access to this page, we're safe to simply set + * PG_locked without checking it first. + */ + __set_page_locked(page); + rc = add_to_page_cache_locked(page, mapping, + page->index, GFP_KERNEL); + + /* give up if we can't stick it in the cache */ + if (rc) { + __clear_page_locked(page); + return rc; + } + + /* move first page to the tmplist */ + *offset = (loff_t)page->index << PAGE_CACHE_SHIFT; + *bytes = PAGE_CACHE_SIZE; + *nr_pages = 1; + list_move_tail(&page->lru, tmplist); + + /* now try and add more pages onto the request */ + expected_index = page->index + 1; + list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { + /* discontinuity ? */ + if (page->index != expected_index) + break; + + /* would this page push the read over the rsize? */ + if (*bytes + PAGE_CACHE_SIZE > rsize) + break; + + __set_page_locked(page); + if (add_to_page_cache_locked(page, mapping, page->index, + GFP_KERNEL)) { + __clear_page_locked(page); + break; + } + list_move_tail(&page->lru, tmplist); + (*bytes) += PAGE_CACHE_SIZE; + expected_index++; + (*nr_pages)++; + } + return rc; } static int cifs_readpages(struct file *file, struct address_space *mapping, @@ -3241,19 +3455,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, struct list_head tmplist; struct cifsFileInfo *open_file = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - unsigned int rsize = cifs_sb->rsize; + struct TCP_Server_Info *server; pid_t pid; /* - * Give up immediately if rsize is too small to read an entire page. - * The VFS will fall back to readpage. We should never reach this - * point however since we set ra_pages to 0 when the rsize is smaller - * than a cache page. - */ - if (unlikely(rsize < PAGE_CACHE_SIZE)) - return 0; - - /* * Reads as many pages as possible from fscache. Returns -ENOBUFS * immediately if the cookie is negative * @@ -3271,7 +3476,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, pid = current->tgid; rc = 0; - INIT_LIST_HEAD(&tmplist); + server = tlink_tcon(open_file->tlink)->ses->server; cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", __func__, file, mapping, num_pages); @@ -3288,58 +3493,35 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * the rdata->pages, then we want them in increasing order. */ while (!list_empty(page_list)) { - unsigned int i; - unsigned int bytes = PAGE_CACHE_SIZE; - unsigned int expected_index; - unsigned int nr_pages = 1; + unsigned int i, nr_pages, bytes, rsize; loff_t offset; struct page *page, *tpage; struct cifs_readdata *rdata; + unsigned credits; - page = list_entry(page_list->prev, struct page, lru); + rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, + &rsize, &credits); + if (rc) + break; /* - * Lock the page and put it in the cache. Since no one else - * should have access to this page, we're safe to simply set - * PG_locked without checking it first. + * Give up immediately if rsize is too small to read an entire + * page. The VFS will fall back to readpage. We should never + * reach this point however since we set ra_pages to 0 when the + * rsize is smaller than a cache page. */ - __set_page_locked(page); - rc = add_to_page_cache_locked(page, mapping, - page->index, GFP_KERNEL); + if (unlikely(rsize < PAGE_CACHE_SIZE)) { + add_credits_and_wake_if(server, credits, 0); + return 0; + } - /* give up if we can't stick it in the cache */ + rc = readpages_get_pages(mapping, page_list, rsize, &tmplist, + &nr_pages, &offset, &bytes); if (rc) { - __clear_page_locked(page); + add_credits_and_wake_if(server, credits, 0); break; } - /* move first page to the tmplist */ - offset = (loff_t)page->index << PAGE_CACHE_SHIFT; - list_move_tail(&page->lru, &tmplist); - - /* now try and add more pages onto the request */ - expected_index = page->index + 1; - list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { - /* discontinuity ? */ - if (page->index != expected_index) - break; - - /* would this page push the read over the rsize? */ - if (bytes + PAGE_CACHE_SIZE > rsize) - break; - - __set_page_locked(page); - if (add_to_page_cache_locked(page, mapping, - page->index, GFP_KERNEL)) { - __clear_page_locked(page); - break; - } - list_move_tail(&page->lru, &tmplist); - bytes += PAGE_CACHE_SIZE; - expected_index++; - nr_pages++; - } - rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete); if (!rdata) { /* best to give up if we're out of mem */ @@ -3350,6 +3532,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, page_cache_release(page); } rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } @@ -3360,21 +3543,32 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rdata->pid = pid; rdata->pagesz = PAGE_CACHE_SIZE; rdata->read_into_pages = cifs_readpages_read_into_pages; + rdata->credits = credits; list_for_each_entry_safe(page, tpage, &tmplist, lru) { list_del(&page->lru); rdata->pages[rdata->nr_pages++] = page; } - rc = cifs_retry_async_readv(rdata); - if (rc != 0) { + if (!rdata->cfile->invalidHandle || + !cifs_reopen_file(rdata->cfile, true)) + rc = server->ops->async_readv(rdata); + if (rc) { + add_credits_and_wake_if(server, rdata->credits, 0); for (i = 0; i < rdata->nr_pages; i++) { page = rdata->pages[i]; lru_cache_add_file(page); unlock_page(page); page_cache_release(page); + if (rc == -EAGAIN) + list_add_tail(&page->lru, &tmplist); } kref_put(&rdata->refcount, cifs_readdata_release); + if (rc == -EAGAIN) { + /* Re-add pages to the page_list and retry */ + list_splice(&tmplist, page_list); + continue; + } break; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 6bf55d0ed49..81340c6253e 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -226,6 +226,15 @@ cifs_small_buf_release(void *buf_to_free) return; } +void +free_rsp_buf(int resp_buftype, void *rsp) +{ + if (resp_buftype == CIFS_SMALL_BUFFER) + cifs_small_buf_release(rsp); + else if (resp_buftype == CIFS_LARGE_BUFFER) + cifs_buf_release(rsp); +} + /* NB: MID can not be set if treeCon not passed in, in that case it is responsbility of caller to set the mid */ void @@ -414,7 +423,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) return true; } if (pSMBr->hdr.Status.CifsError) { - cifs_dbg(FYI, "notify err 0x%d\n", + cifs_dbg(FYI, "notify err 0x%x\n", pSMBr->hdr.Status.CifsError); return true; } @@ -441,7 +450,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) if (pSMB->hdr.WordCount != 8) return false; - cifs_dbg(FYI, "oplock type 0x%d level 0x%d\n", + cifs_dbg(FYI, "oplock type 0x%x level 0x%x\n", pSMB->LockType, pSMB->OplockLevel); if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) return false; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index e87387dbf39..39ee32688ea 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -520,382 +520,559 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) } } -int -CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *nls_cp) +struct sess_data { + unsigned int xid; + struct cifs_ses *ses; + struct nls_table *nls_cp; + void (*func)(struct sess_data *); + int result; + + /* we will send the SMB in three pieces: + * a fixed length beginning part, an optional + * SPNEGO blob (which can be zero length), and a + * last part which will include the strings + * and rest of bcc area. This allows us to avoid + * a large buffer 17K allocation + */ + int buf0_type; + struct kvec iov[3]; +}; + +static int +sess_alloc_buffer(struct sess_data *sess_data, int wct) { - int rc = 0; - int wct; + int rc; + struct cifs_ses *ses = sess_data->ses; struct smb_hdr *smb_buf; - char *bcc_ptr; - char *str_area; - SESSION_SETUP_ANDX *pSMB; - __u32 capabilities; - __u16 count; - int resp_buf_type; - struct kvec iov[3]; - enum securityEnum type; - __u16 action, bytes_remaining; - struct key *spnego_key = NULL; - __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ - u16 blob_len; - char *ntlmsspblob = NULL; - if (ses == NULL) { - WARN(1, "%s: ses == NULL!", __func__); - return -EINVAL; - } + rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, + (void **)&smb_buf); - type = select_sectype(ses->server, ses->sectype); - cifs_dbg(FYI, "sess setup type %d\n", type); - if (type == Unspecified) { - cifs_dbg(VFS, - "Unable to select appropriate authentication method!"); - return -EINVAL; + if (rc) + return rc; + + sess_data->iov[0].iov_base = (char *)smb_buf; + sess_data->iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4; + /* + * This variable will be used to clear the buffer + * allocated above in case of any error in the calling function. + */ + sess_data->buf0_type = CIFS_SMALL_BUFFER; + + /* 2000 big enough to fit max user, domain, NOS name etc. */ + sess_data->iov[2].iov_base = kmalloc(2000, GFP_KERNEL); + if (!sess_data->iov[2].iov_base) { + rc = -ENOMEM; + goto out_free_smb_buf; } - if (type == RawNTLMSSP) { - /* if memory allocation is successful, caller of this function - * frees it. - */ - ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); - if (!ses->ntlmssp) - return -ENOMEM; - ses->ntlmssp->sesskey_per_smbsess = false; + return 0; + +out_free_smb_buf: + kfree(smb_buf); + sess_data->iov[0].iov_base = NULL; + sess_data->iov[0].iov_len = 0; + sess_data->buf0_type = CIFS_NO_BUFFER; + return rc; +} + +static void +sess_free_buffer(struct sess_data *sess_data) +{ + free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base); + sess_data->buf0_type = CIFS_NO_BUFFER; + kfree(sess_data->iov[2].iov_base); +} + +static int +sess_establish_session(struct sess_data *sess_data) +{ + struct cifs_ses *ses = sess_data->ses; + + mutex_lock(&ses->server->srv_mutex); + if (!ses->server->session_estab) { + if (ses->server->sign) { + ses->server->session_key.response = + kmemdup(ses->auth_key.response, + ses->auth_key.len, GFP_KERNEL); + if (!ses->server->session_key.response) { + mutex_unlock(&ses->server->srv_mutex); + return -ENOMEM; + } + ses->server->session_key.len = + ses->auth_key.len; + } + ses->server->sequence_number = 0x2; + ses->server->session_estab = true; } + mutex_unlock(&ses->server->srv_mutex); -ssetup_ntlmssp_authenticate: - if (phase == NtLmChallenge) - phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ + cifs_dbg(FYI, "CIFS session established successfully\n"); + spin_lock(&GlobalMid_Lock); + ses->status = CifsGood; + ses->need_reconnect = false; + spin_unlock(&GlobalMid_Lock); - if (type == LANMAN) { -#ifndef CONFIG_CIFS_WEAK_PW_HASH - /* LANMAN and plaintext are less secure and off by default. - So we make this explicitly be turned on in kconfig (in the - build) and turned on at runtime (changed from the default) - in proc/fs/cifs or via mount parm. Unfortunately this is - needed for old Win (e.g. Win95), some obscure NAS and OS/2 */ - return -EOPNOTSUPP; -#endif - wct = 10; /* lanman 2 style sessionsetup */ - } else if ((type == NTLM) || (type == NTLMv2)) { - /* For NTLMv2 failures eventually may need to retry NTLM */ - wct = 13; /* old style NTLM sessionsetup */ - } else /* same size: negotiate or auth, NTLMSSP or extended security */ - wct = 12; + return 0; +} - rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, - (void **)&smb_buf); - if (rc) - return rc; +static int +sess_sendreceive(struct sess_data *sess_data) +{ + int rc; + struct smb_hdr *smb_buf = (struct smb_hdr *) sess_data->iov[0].iov_base; + __u16 count; - pSMB = (SESSION_SETUP_ANDX *)smb_buf; + count = sess_data->iov[1].iov_len + sess_data->iov[2].iov_len; + smb_buf->smb_buf_length = + cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count); + put_bcc(count, smb_buf); + + rc = SendReceive2(sess_data->xid, sess_data->ses, + sess_data->iov, 3 /* num_iovecs */, + &sess_data->buf0_type, + CIFS_LOG_ERROR); + + return rc; +} +/* + * LANMAN and plaintext are less secure and off by default. + * So we make this explicitly be turned on in kconfig (in the + * build) and turned on at runtime (changed from the default) + * in proc/fs/cifs or via mount parm. Unfortunately this is + * needed for old Win (e.g. Win95), some obscure NAS and OS/2 + */ +#ifdef CONFIG_CIFS_WEAK_PW_HASH +static void +sess_auth_lanman(struct sess_data *sess_data) +{ + int rc = 0; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + char *bcc_ptr; + struct cifs_ses *ses = sess_data->ses; + char lnm_session_key[CIFS_AUTH_RESP_SIZE]; + __u32 capabilities; + __u16 bytes_remaining; + + /* lanman 2 style sessionsetup */ + /* wct = 10 */ + rc = sess_alloc_buffer(sess_data, 10); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + bcc_ptr = sess_data->iov[2].iov_base; capabilities = cifs_ssetup_hdr(ses, pSMB); - /* we will send the SMB in three pieces: - a fixed length beginning part, an optional - SPNEGO blob (which can be zero length), and a - last part which will include the strings - and rest of bcc area. This allows us to avoid - a large buffer 17K allocation */ - iov[0].iov_base = (char *)pSMB; - iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4; - - /* setting this here allows the code at the end of the function - to free the request buffer if there's an error */ - resp_buf_type = CIFS_SMALL_BUFFER; + pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; - /* 2000 big enough to fit max user, domain, NOS name etc. */ - str_area = kmalloc(2000, GFP_KERNEL); - if (str_area == NULL) { - rc = -ENOMEM; - goto ssetup_exit; - } - bcc_ptr = str_area; + /* no capabilities flags in old lanman negotiation */ + pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); - iov[1].iov_base = NULL; - iov[1].iov_len = 0; + /* Calculate hash with password and copy into bcc_ptr. + * Encryption Key (stored as in cryptkey) gets used if the + * security mode bit in Negottiate Protocol response states + * to use challenge/response method (i.e. Password bit is 1). + */ + rc = calc_lanman_hash(ses->password, ses->server->cryptkey, + ses->server->sec_mode & SECMODE_PW_ENCRYPT ? + true : false, lnm_session_key); - if (type == LANMAN) { -#ifdef CONFIG_CIFS_WEAK_PW_HASH - char lnm_session_key[CIFS_AUTH_RESP_SIZE]; + memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); + bcc_ptr += CIFS_AUTH_RESP_SIZE; + + /* + * can not sign if LANMAN negotiated so no need + * to calculate signing key? but what if server + * changed to do higher than lanman dialect and + * we reconnected would we ever calc signing_key? + */ - pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; + cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n"); + /* Unicode not allowed for LANMAN dialects */ + ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); - /* no capabilities flags in old lanman negotiation */ + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; - pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); + rc = sess_sendreceive(sess_data); + if (rc) + goto out; - /* Calculate hash with password and copy into bcc_ptr. - * Encryption Key (stored as in cryptkey) gets used if the - * security mode bit in Negottiate Protocol response states - * to use challenge/response method (i.e. Password bit is 1). - */ + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; - rc = calc_lanman_hash(ses->password, ses->server->cryptkey, - ses->server->sec_mode & SECMODE_PW_ENCRYPT ? - true : false, lnm_session_key); + /* lanman response has a word count of 3 */ + if (smb_buf->WordCount != 3) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out; + } - memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); - bcc_ptr += CIFS_AUTH_RESP_SIZE; + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); - /* can not sign if LANMAN negotiated so no need - to calculate signing key? but what if server - changed to do higher than lanman dialect and - we reconnected would we ever calc signing_key? */ + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); - cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n"); - /* Unicode not allowed for LANMAN dialects */ - ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; + } + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } + + rc = sess_establish_session(sess_data); +out: + sess_data->result = rc; + sess_data->func = NULL; + sess_free_buffer(sess_data); +} + +#else + +static void +sess_auth_lanman(struct sess_data *sess_data) +{ + sess_data->result = -EOPNOTSUPP; + sess_data->func = NULL; +} #endif - } else if (type == NTLM) { - pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); - pSMB->req_no_secext.CaseInsensitivePasswordLength = + +static void +sess_auth_ntlm(struct sess_data *sess_data) +{ + int rc = 0; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + char *bcc_ptr; + struct cifs_ses *ses = sess_data->ses; + __u32 capabilities; + __u16 bytes_remaining; + + /* old style NTLM sessionsetup */ + /* wct = 13 */ + rc = sess_alloc_buffer(sess_data, 13); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + bcc_ptr = sess_data->iov[2].iov_base; + capabilities = cifs_ssetup_hdr(ses, pSMB); + + pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); + pSMB->req_no_secext.CaseInsensitivePasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); - pSMB->req_no_secext.CaseSensitivePasswordLength = + pSMB->req_no_secext.CaseSensitivePasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); - /* calculate ntlm response and session key */ - rc = setup_ntlm_response(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "Error %d during NTLM authentication\n", + /* calculate ntlm response and session key */ + rc = setup_ntlm_response(ses, sess_data->nls_cp); + if (rc) { + cifs_dbg(VFS, "Error %d during NTLM authentication\n", rc); - goto ssetup_exit; - } + goto out; + } - /* copy ntlm response */ - memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, - CIFS_AUTH_RESP_SIZE); - bcc_ptr += CIFS_AUTH_RESP_SIZE; - memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, - CIFS_AUTH_RESP_SIZE); - bcc_ptr += CIFS_AUTH_RESP_SIZE; - - if (ses->capabilities & CAP_UNICODE) { - /* unicode strings must be word aligned */ - if (iov[0].iov_len % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else - ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else if (type == NTLMv2) { - pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); - - /* LM2 password would be here if we supported it */ - pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; - - /* calculate nlmv2 response and session key */ - rc = setup_ntlmv2_rsp(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", - rc); - goto ssetup_exit; + /* copy ntlm response */ + memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, + CIFS_AUTH_RESP_SIZE); + bcc_ptr += CIFS_AUTH_RESP_SIZE; + memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, + CIFS_AUTH_RESP_SIZE); + bcc_ptr += CIFS_AUTH_RESP_SIZE; + + if (ses->capabilities & CAP_UNICODE) { + /* unicode strings must be word aligned */ + if (sess_data->iov[0].iov_len % 2) { + *bcc_ptr = 0; + bcc_ptr++; } - memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, - ses->auth_key.len - CIFS_SESS_KEY_SIZE); - bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; - - /* set case sensitive password length after tilen may get - * assigned, tilen is 0 otherwise. - */ - pSMB->req_no_secext.CaseSensitivePasswordLength = - cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE); + unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + } else { + ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + } - if (ses->capabilities & CAP_UNICODE) { - if (iov[0].iov_len % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else - ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else if (type == Kerberos) { -#ifdef CONFIG_CIFS_UPCALL - struct cifs_spnego_msg *msg; - spnego_key = cifs_get_spnego_key(ses); - if (IS_ERR(spnego_key)) { - rc = PTR_ERR(spnego_key); - spnego_key = NULL; - goto ssetup_exit; - } + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; - msg = spnego_key->payload.data; - /* check version field to make sure that cifs.upcall is - sending us a response in an expected form */ - if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { - cifs_dbg(VFS, "incorrect version of cifs.upcall " - "expected %d but got %d)", - CIFS_SPNEGO_UPCALL_VERSION, msg->version); - rc = -EKEYREJECTED; - goto ssetup_exit; - } + rc = sess_sendreceive(sess_data); + if (rc) + goto out; - ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, - GFP_KERNEL); - if (!ses->auth_key.response) { - cifs_dbg(VFS, - "Kerberos can't allocate (%u bytes) memory", - msg->sesskey_len); - rc = -ENOMEM; - goto ssetup_exit; - } - ses->auth_key.len = msg->sesskey_len; - - pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; - capabilities |= CAP_EXTENDED_SECURITY; - pSMB->req.Capabilities = cpu_to_le32(capabilities); - iov[1].iov_base = msg->data + msg->sesskey_len; - iov[1].iov_len = msg->secblob_len; - pSMB->req.SecurityBlobLength = cpu_to_le16(iov[1].iov_len); - - if (ses->capabilities & CAP_UNICODE) { - /* unicode strings must be word aligned */ - if ((iov[0].iov_len + iov[1].iov_len) % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - unicode_oslm_strings(&bcc_ptr, nls_cp); - unicode_domain_string(&bcc_ptr, ses, nls_cp); - } else - /* BB: is this right? */ - ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); -#else /* ! CONFIG_CIFS_UPCALL */ - cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n"); - rc = -ENOSYS; - goto ssetup_exit; -#endif /* CONFIG_CIFS_UPCALL */ - } else if (type == RawNTLMSSP) { - if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { - cifs_dbg(VFS, "NTLMSSP requires Unicode support\n"); - rc = -ENOSYS; - goto ssetup_exit; - } + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; - cifs_dbg(FYI, "ntlmssp session setup phase %d\n", phase); - pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; - capabilities |= CAP_EXTENDED_SECURITY; - pSMB->req.Capabilities |= cpu_to_le32(capabilities); - switch(phase) { - case NtLmNegotiate: - build_ntlmssp_negotiate_blob( - pSMB->req.SecurityBlob, ses); - iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); - iov[1].iov_base = pSMB->req.SecurityBlob; - pSMB->req.SecurityBlobLength = - cpu_to_le16(sizeof(NEGOTIATE_MESSAGE)); - break; - case NtLmAuthenticate: - /* - * 5 is an empirical value, large enough to hold - * authenticate message plus max 10 of av paris, - * domain, user, workstation names, flags, etc. - */ - ntlmsspblob = kzalloc( - 5*sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - rc = -ENOMEM; - goto ssetup_exit; - } + if (smb_buf->WordCount != 3) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out; + } - rc = build_ntlmssp_auth_blob(ntlmsspblob, - &blob_len, ses, nls_cp); - if (rc) - goto ssetup_exit; - iov[1].iov_len = blob_len; - iov[1].iov_base = ntlmsspblob; - pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len); - /* - * Make sure that we tell the server that we are using - * the uid that it just gave us back on the response - * (challenge) - */ - smb_buf->Uid = ses->Suid; - break; - default: - cifs_dbg(VFS, "invalid phase %d\n", phase); - rc = -ENOSYS; - goto ssetup_exit; + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; } - /* unicode strings must be word aligned */ - if ((iov[0].iov_len + iov[1].iov_len) % 2) { + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } + + rc = sess_establish_session(sess_data); +out: + sess_data->result = rc; + sess_data->func = NULL; + sess_free_buffer(sess_data); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; +} + +static void +sess_auth_ntlmv2(struct sess_data *sess_data) +{ + int rc = 0; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + char *bcc_ptr; + struct cifs_ses *ses = sess_data->ses; + __u32 capabilities; + __u16 bytes_remaining; + + /* old style NTLM sessionsetup */ + /* wct = 13 */ + rc = sess_alloc_buffer(sess_data, 13); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + bcc_ptr = sess_data->iov[2].iov_base; + capabilities = cifs_ssetup_hdr(ses, pSMB); + + pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); + + /* LM2 password would be here if we supported it */ + pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; + + /* calculate nlmv2 response and session key */ + rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp); + if (rc) { + cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc); + goto out; + } + + memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, + ses->auth_key.len - CIFS_SESS_KEY_SIZE); + bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; + + /* set case sensitive password length after tilen may get + * assigned, tilen is 0 otherwise. + */ + pSMB->req_no_secext.CaseSensitivePasswordLength = + cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE); + + if (ses->capabilities & CAP_UNICODE) { + if (sess_data->iov[0].iov_len % 2) { *bcc_ptr = 0; bcc_ptr++; } - unicode_oslm_strings(&bcc_ptr, nls_cp); + unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); } else { - cifs_dbg(VFS, "secType %d not supported!\n", type); - rc = -ENOSYS; - goto ssetup_exit; + ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); } - iov[2].iov_base = str_area; - iov[2].iov_len = (long) bcc_ptr - (long) str_area; - count = iov[1].iov_len + iov[2].iov_len; - smb_buf->smb_buf_length = - cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count); + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; - put_bcc(count, smb_buf); + rc = sess_sendreceive(sess_data); + if (rc) + goto out; - rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, - CIFS_LOG_ERROR); - /* SMB request buf freed in SendReceive2 */ + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + + if (smb_buf->WordCount != 3) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out; + } + + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); - pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; - smb_buf = (struct smb_hdr *)iov[0].iov_base; + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); - if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) && - (smb_buf->Status.CifsError == - cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { - if (phase != NtLmNegotiate) { - cifs_dbg(VFS, "Unexpected more processing error\n"); - goto ssetup_exit; + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; } - /* NTLMSSP Negotiate sent now processing challenge (response) */ - phase = NtLmChallenge; /* process ntlmssp challenge */ - rc = 0; /* MORE_PROC rc is not an error here, but expected */ + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); } + + rc = sess_establish_session(sess_data); +out: + sess_data->result = rc; + sess_data->func = NULL; + sess_free_buffer(sess_data); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; +} + +#ifdef CONFIG_CIFS_UPCALL +static void +sess_auth_kerberos(struct sess_data *sess_data) +{ + int rc = 0; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + char *bcc_ptr; + struct cifs_ses *ses = sess_data->ses; + __u32 capabilities; + __u16 bytes_remaining; + struct key *spnego_key = NULL; + struct cifs_spnego_msg *msg; + u16 blob_len; + + /* extended security */ + /* wct = 12 */ + rc = sess_alloc_buffer(sess_data, 12); if (rc) - goto ssetup_exit; + goto out; - if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + bcc_ptr = sess_data->iov[2].iov_base; + capabilities = cifs_ssetup_hdr(ses, pSMB); + + spnego_key = cifs_get_spnego_key(ses); + if (IS_ERR(spnego_key)) { + rc = PTR_ERR(spnego_key); + spnego_key = NULL; + goto out; + } + + msg = spnego_key->payload.data; + /* + * check version field to make sure that cifs.upcall is + * sending us a response in an expected form + */ + if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { + cifs_dbg(VFS, + "incorrect version of cifs.upcall (expected %d but got %d)", + CIFS_SPNEGO_UPCALL_VERSION, msg->version); + rc = -EKEYREJECTED; + goto out_put_spnego_key; + } + + ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, + GFP_KERNEL); + if (!ses->auth_key.response) { + cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory", + msg->sesskey_len); + rc = -ENOMEM; + goto out_put_spnego_key; + } + ses->auth_key.len = msg->sesskey_len; + + pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; + capabilities |= CAP_EXTENDED_SECURITY; + pSMB->req.Capabilities = cpu_to_le32(capabilities); + sess_data->iov[1].iov_base = msg->data + msg->sesskey_len; + sess_data->iov[1].iov_len = msg->secblob_len; + pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len); + + if (ses->capabilities & CAP_UNICODE) { + /* unicode strings must be word aligned */ + if ((sess_data->iov[0].iov_len + + sess_data->iov[1].iov_len) % 2) { + *bcc_ptr = 0; + bcc_ptr++; + } + unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp); + unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp); + } else { + /* BB: is this right? */ + ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + } + + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; + + rc = sess_sendreceive(sess_data); + if (rc) + goto out_put_spnego_key; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + + if (smb_buf->WordCount != 4) { rc = -EIO; cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); - goto ssetup_exit; + goto out_put_spnego_key; } - action = le16_to_cpu(pSMB->resp.Action); - if (action & GUEST_LOGIN) + + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ cifs_dbg(FYI, "UID = %llu\n", ses->Suid); - /* response can have either 3 or 4 word count - Samba sends 3 */ - /* and lanman response is 3 */ + bytes_remaining = get_bcc(smb_buf); bcc_ptr = pByteArea(smb_buf); - if (smb_buf->WordCount == 4) { - blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); - if (blob_len > bytes_remaining) { - cifs_dbg(VFS, "bad security blob length %d\n", - blob_len); - rc = -EINVAL; - goto ssetup_exit; - } - if (phase == NtLmChallenge) { - rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); - /* now goto beginning for ntlmssp authenticate phase */ - if (rc) - goto ssetup_exit; - } - bcc_ptr += blob_len; - bytes_remaining -= blob_len; + blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); + if (blob_len > bytes_remaining) { + cifs_dbg(VFS, "bad security blob length %d\n", + blob_len); + rc = -EINVAL; + goto out_put_spnego_key; } + bcc_ptr += blob_len; + bytes_remaining -= blob_len; /* BB check if Unicode and decode strings */ if (bytes_remaining == 0) { @@ -906,60 +1083,371 @@ ssetup_ntlmssp_authenticate: ++bcc_ptr; --bytes_remaining; } - decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); } else { - decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); } -ssetup_exit: - if (spnego_key) { - key_invalidate(spnego_key); - key_put(spnego_key); + rc = sess_establish_session(sess_data); +out_put_spnego_key: + key_invalidate(spnego_key); + key_put(spnego_key); +out: + sess_data->result = rc; + sess_data->func = NULL; + sess_free_buffer(sess_data); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; +} + +#else + +static void +sess_auth_kerberos(struct sess_data *sess_data) +{ + cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n"); + sess_data->result = -ENOSYS; + sess_data->func = NULL; +} +#endif /* ! CONFIG_CIFS_UPCALL */ + +/* + * The required kvec buffers have to be allocated before calling this + * function. + */ +static int +_sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data) +{ + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + struct cifs_ses *ses = sess_data->ses; + __u32 capabilities; + char *bcc_ptr; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)pSMB; + + capabilities = cifs_ssetup_hdr(ses, pSMB); + if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { + cifs_dbg(VFS, "NTLMSSP requires Unicode support\n"); + return -ENOSYS; } - kfree(str_area); - kfree(ntlmsspblob); - ntlmsspblob = NULL; - if (resp_buf_type == CIFS_SMALL_BUFFER) { - cifs_dbg(FYI, "ssetup freeing small buf %p\n", iov[0].iov_base); - cifs_small_buf_release(iov[0].iov_base); - } else if (resp_buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(iov[0].iov_base); - /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ - if ((phase == NtLmChallenge) && (rc == 0)) - goto ssetup_ntlmssp_authenticate; + pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; + capabilities |= CAP_EXTENDED_SECURITY; + pSMB->req.Capabilities |= cpu_to_le32(capabilities); + + bcc_ptr = sess_data->iov[2].iov_base; + /* unicode strings must be word aligned */ + if ((sess_data->iov[0].iov_len + sess_data->iov[1].iov_len) % 2) { + *bcc_ptr = 0; + bcc_ptr++; + } + unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp); + + sess_data->iov[2].iov_len = (long) bcc_ptr - + (long) sess_data->iov[2].iov_base; + + return 0; +} + +static void +sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data); + +static void +sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) +{ + int rc; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + struct cifs_ses *ses = sess_data->ses; + __u16 bytes_remaining; + char *bcc_ptr; + u16 blob_len; + + cifs_dbg(FYI, "rawntlmssp session setup negotiate phase\n"); + + /* + * if memory allocation is successful, caller of this function + * frees it. + */ + ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); + if (!ses->ntlmssp) { + rc = -ENOMEM; + goto out; + } + ses->ntlmssp->sesskey_per_smbsess = false; + + /* wct = 12 */ + rc = sess_alloc_buffer(sess_data, 12); + if (rc) + goto out; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + + /* Build security blob before we assemble the request */ + build_ntlmssp_negotiate_blob(pSMB->req.SecurityBlob, ses); + sess_data->iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); + sess_data->iov[1].iov_base = pSMB->req.SecurityBlob; + pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE)); + + rc = _sess_auth_rawntlmssp_assemble_req(sess_data); + if (rc) + goto out; + + rc = sess_sendreceive(sess_data); + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + + /* If true, rc here is expected and not an error */ + if (sess_data->buf0_type != CIFS_NO_BUFFER && + smb_buf->Status.CifsError == + cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED)) + rc = 0; + + if (rc) + goto out; + + cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n"); + + if (smb_buf->WordCount != 4) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out; + } + + ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + + blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); + if (blob_len > bytes_remaining) { + cifs_dbg(VFS, "bad security blob length %d\n", + blob_len); + rc = -EINVAL; + goto out; + } + + rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); +out: + sess_free_buffer(sess_data); if (!rc) { - mutex_lock(&ses->server->srv_mutex); - if (!ses->server->session_estab) { - if (ses->server->sign) { - ses->server->session_key.response = - kmemdup(ses->auth_key.response, - ses->auth_key.len, GFP_KERNEL); - if (!ses->server->session_key.response) { - rc = -ENOMEM; - mutex_unlock(&ses->server->srv_mutex); - goto keycp_exit; - } - ses->server->session_key.len = - ses->auth_key.len; - } - ses->server->sequence_number = 0x2; - ses->server->session_estab = true; - } - mutex_unlock(&ses->server->srv_mutex); + sess_data->func = sess_auth_rawntlmssp_authenticate; + return; + } + + /* Else error. Cleanup */ + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + kfree(ses->ntlmssp); + ses->ntlmssp = NULL; + + sess_data->func = NULL; + sess_data->result = rc; +} - cifs_dbg(FYI, "CIFS session established successfully\n"); - spin_lock(&GlobalMid_Lock); - ses->status = CifsGood; - ses->need_reconnect = false; - spin_unlock(&GlobalMid_Lock); +static void +sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) +{ + int rc; + struct smb_hdr *smb_buf; + SESSION_SETUP_ANDX *pSMB; + struct cifs_ses *ses = sess_data->ses; + __u16 bytes_remaining; + char *bcc_ptr; + char *ntlmsspblob = NULL; + u16 blob_len; + + cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n"); + + /* wct = 12 */ + rc = sess_alloc_buffer(sess_data, 12); + if (rc) + goto out; + + /* Build security blob before we assemble the request */ + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)pSMB; + /* + * 5 is an empirical value, large enough to hold + * authenticate message plus max 10 of av paris, + * domain, user, workstation names, flags, etc. + */ + ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE), + GFP_KERNEL); + if (!ntlmsspblob) { + rc = -ENOMEM; + goto out; } -keycp_exit: + rc = build_ntlmssp_auth_blob(ntlmsspblob, + &blob_len, ses, sess_data->nls_cp); + if (rc) + goto out_free_ntlmsspblob; + sess_data->iov[1].iov_len = blob_len; + sess_data->iov[1].iov_base = ntlmsspblob; + pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len); + /* + * Make sure that we tell the server that we are using + * the uid that it just gave us back on the response + * (challenge) + */ + smb_buf->Uid = ses->Suid; + + rc = _sess_auth_rawntlmssp_assemble_req(sess_data); + if (rc) + goto out_free_ntlmsspblob; + + rc = sess_sendreceive(sess_data); + if (rc) + goto out_free_ntlmsspblob; + + pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; + smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base; + if (smb_buf->WordCount != 4) { + rc = -EIO; + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); + goto out_free_ntlmsspblob; + } + + if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + + bytes_remaining = get_bcc(smb_buf); + bcc_ptr = pByteArea(smb_buf); + blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); + if (blob_len > bytes_remaining) { + cifs_dbg(VFS, "bad security blob length %d\n", + blob_len); + rc = -EINVAL; + goto out_free_ntlmsspblob; + } + bcc_ptr += blob_len; + bytes_remaining -= blob_len; + + + /* BB check if Unicode and decode strings */ + if (bytes_remaining == 0) { + /* no string area to decode, do nothing */ + } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; + } + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } else { + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, + sess_data->nls_cp); + } + +out_free_ntlmsspblob: + kfree(ntlmsspblob); +out: + sess_free_buffer(sess_data); + + if (!rc) + rc = sess_establish_session(sess_data); + + /* Cleanup */ kfree(ses->auth_key.response); ses->auth_key.response = NULL; kfree(ses->ntlmssp); + ses->ntlmssp = NULL; + + sess_data->func = NULL; + sess_data->result = rc; +} + +static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data) +{ + int type; + + type = select_sectype(ses->server, ses->sectype); + cifs_dbg(FYI, "sess setup type %d\n", type); + if (type == Unspecified) { + cifs_dbg(VFS, + "Unable to select appropriate authentication method!"); + return -EINVAL; + } + + switch (type) { + case LANMAN: + /* LANMAN and plaintext are less secure and off by default. + * So we make this explicitly be turned on in kconfig (in the + * build) and turned on at runtime (changed from the default) + * in proc/fs/cifs or via mount parm. Unfortunately this is + * needed for old Win (e.g. Win95), some obscure NAS and OS/2 */ +#ifdef CONFIG_CIFS_WEAK_PW_HASH + sess_data->func = sess_auth_lanman; + break; +#else + return -EOPNOTSUPP; +#endif + case NTLM: + sess_data->func = sess_auth_ntlm; + break; + case NTLMv2: + sess_data->func = sess_auth_ntlmv2; + break; + case Kerberos: +#ifdef CONFIG_CIFS_UPCALL + sess_data->func = sess_auth_kerberos; + break; +#else + cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n"); + return -ENOSYS; + break; +#endif /* CONFIG_CIFS_UPCALL */ + case RawNTLMSSP: + sess_data->func = sess_auth_rawntlmssp_negotiate; + break; + default: + cifs_dbg(VFS, "secType %d not supported!\n", type); + return -ENOSYS; + } + + return 0; +} + +int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_cp) +{ + int rc = 0; + struct sess_data *sess_data; + + if (ses == NULL) { + WARN(1, "%s: ses == NULL!", __func__); + return -EINVAL; + } + + sess_data = kzalloc(sizeof(struct sess_data), GFP_KERNEL); + if (!sess_data) + return -ENOMEM; + + rc = select_sec(ses, sess_data); + if (rc) + goto out; + + sess_data->xid = xid; + sess_data->ses = ses; + sess_data->buf0_type = CIFS_NO_BUFFER; + sess_data->nls_cp = (struct nls_table *) nls_cp; + + while (sess_data->func) + sess_data->func(sess_data); + + /* Store result before we free sess_data */ + rc = sess_data->result; +out: + kfree(sess_data); return rc; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index d1fdfa84870..5e8c22d6c7b 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1009,6 +1009,12 @@ cifs_is_read_op(__u32 oplock) return oplock == OPLOCK_READ; } +static unsigned int +cifs_wp_retry_size(struct inode *inode) +{ + return CIFS_SB(inode->i_sb)->wsize; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1019,6 +1025,7 @@ struct smb_version_operations smb1_operations = { .set_credits = cifs_set_credits, .get_credits_field = cifs_get_credits_field, .get_credits = cifs_get_credits, + .wait_mtu_credits = cifs_wait_mtu_credits, .get_next_mid = cifs_get_next_mid, .read_data_offset = cifs_read_data_offset, .read_data_length = cifs_read_data_length, @@ -1078,6 +1085,7 @@ struct smb_version_operations smb1_operations = { .query_mf_symlink = cifs_query_mf_symlink, .create_mf_symlink = cifs_create_mf_symlink, .is_read_op = cifs_is_read_op, + .wp_retry_size = cifs_wp_retry_size, #ifdef CONFIG_CIFS_XATTR .query_all_EAs = CIFSSMBQAllEAs, .set_EA = CIFSSMBSetEA, diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 84c012a6aba..0150182a449 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -91,7 +91,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, case SMB2_OP_SET_EOF: tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid, fid.volatile_fid, current->tgid, - (__le64 *)data); + (__le64 *)data, false); break; case SMB2_OP_SET_INFO: tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid, diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 94bd4fbb13d..e31a9dfdcd3 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"}, {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"}, {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"}, - {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"}, + {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"}, {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"}, {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"}, {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"}, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index b8021fde987..f2e6ac29a8d 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -437,7 +437,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, continue; cifs_dbg(FYI, "found in the open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + cifs_dbg(FYI, "lease key match, lease break 0x%x\n", le32_to_cpu(rsp->NewLeaseState)); server->ops->set_oplock_level(cinode, lease_state, 0, NULL); @@ -467,7 +467,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, } cifs_dbg(FYI, "found in the pending open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + cifs_dbg(FYI, "lease key match, lease break 0x%x\n", le32_to_cpu(rsp->NewLeaseState)); open->oplock = lease_state; @@ -546,7 +546,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) return false; } - cifs_dbg(FYI, "oplock level 0x%d\n", rsp->OplockLevel); + cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel); /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 787844bde38..77f8aeb9c2f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -19,6 +19,7 @@ #include <linux/pagemap.h> #include <linux/vfs.h> +#include <linux/falloc.h> #include "cifsglob.h" #include "smb2pdu.h" #include "smb2proto.h" @@ -112,6 +113,53 @@ smb2_get_credits(struct mid_q_entry *mid) return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest); } +static int +smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, + unsigned int *num, unsigned int *credits) +{ + int rc = 0; + unsigned int scredits; + + spin_lock(&server->req_lock); + while (1) { + if (server->credits <= 0) { + spin_unlock(&server->req_lock); + cifs_num_waiters_inc(server); + rc = wait_event_killable(server->request_q, + has_credits(server, &server->credits)); + cifs_num_waiters_dec(server); + if (rc) + return rc; + spin_lock(&server->req_lock); + } else { + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->req_lock); + return -ENOENT; + } + + scredits = server->credits; + /* can deadlock with reopen */ + if (scredits == 1) { + *num = SMB2_MAX_BUFFER_SIZE; + *credits = 0; + break; + } + + /* leave one credit for a possible reopen */ + scredits--; + *num = min_t(unsigned int, size, + scredits * SMB2_MAX_BUFFER_SIZE); + + *credits = DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE); + server->credits -= *credits; + server->in_flight++; + break; + } + } + spin_unlock(&server->req_lock); + return rc; +} + static __u64 smb2_get_next_mid(struct TCP_Server_Info *server) { @@ -182,8 +230,9 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified wsize, or default */ wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); - /* set it to the maximum buffer size value we can send with 1 credit */ - wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); + + if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) + wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); return wsize; } @@ -197,8 +246,9 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified rsize, or default */ rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); - /* set it to the maximum buffer size value we can send with 1 credit */ - rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); + + if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) + rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); return rsize; } @@ -687,7 +737,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, { __le64 eof = cpu_to_le64(size); return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, - cfile->fid.volatile_fid, cfile->pid, &eof); + cfile->fid.volatile_fid, cfile->pid, &eof, false); } static int @@ -1104,6 +1154,13 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch) return le32_to_cpu(lc->lcontext.LeaseState); } +static unsigned int +smb2_wp_retry_size(struct inode *inode) +{ + return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize, + SMB2_MAX_BUFFER_SIZE); +} + struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -1113,6 +1170,7 @@ struct smb_version_operations smb20_operations = { .set_credits = smb2_set_credits, .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, + .wait_mtu_credits = cifs_wait_mtu_credits, .get_next_mid = smb2_get_next_mid, .read_data_offset = smb2_read_data_offset, .read_data_length = smb2_read_data_length, @@ -1177,6 +1235,7 @@ struct smb_version_operations smb20_operations = { .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, .clone_range = smb2_clone_range, + .wp_retry_size = smb2_wp_retry_size, }; struct smb_version_operations smb21_operations = { @@ -1188,6 +1247,7 @@ struct smb_version_operations smb21_operations = { .set_credits = smb2_set_credits, .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, + .wait_mtu_credits = smb2_wait_mtu_credits, .get_next_mid = smb2_get_next_mid, .read_data_offset = smb2_read_data_offset, .read_data_length = smb2_read_data_length, @@ -1252,6 +1312,7 @@ struct smb_version_operations smb21_operations = { .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, .clone_range = smb2_clone_range, + .wp_retry_size = smb2_wp_retry_size, }; struct smb_version_operations smb30_operations = { @@ -1263,6 +1324,7 @@ struct smb_version_operations smb30_operations = { .set_credits = smb2_set_credits, .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, + .wait_mtu_credits = smb2_wait_mtu_credits, .get_next_mid = smb2_get_next_mid, .read_data_offset = smb2_read_data_offset, .read_data_length = smb2_read_data_length, @@ -1330,6 +1392,7 @@ struct smb_version_operations smb30_operations = { .parse_lease_buf = smb3_parse_lease_buf, .clone_range = smb2_clone_range, .validate_negotiate = smb3_validate_negotiate, + .wp_retry_size = smb2_wp_retry_size, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b0b260dbb19..42ebc1a8be6 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -108,7 +108,6 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , if (!tcon) goto out; - /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */ /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ if ((tcon->ses) && @@ -245,10 +244,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if (rc) goto out; atomic_inc(&tconInfoReconnectCount); - /* - * BB FIXME add code to check if wsize needs update due to negotiated - * smb buffer size shrinking. - */ out: /* * Check if handle based operation so we know whether we can continue @@ -309,16 +304,6 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; } -static void -free_rsp_buf(int resp_buftype, void *rsp) -{ - if (resp_buftype == CIFS_SMALL_BUFFER) - cifs_small_buf_release(rsp); - else if (resp_buftype == CIFS_LARGE_BUFFER) - cifs_buf_release(rsp); -} - - /* * * SMB2 Worker functions follow: @@ -1738,12 +1723,18 @@ smb2_readv_callback(struct mid_q_entry *mid) rc); } /* FIXME: should this be counted toward the initiating task? */ - task_io_account_read(rdata->bytes); - cifs_stats_bytes_read(tcon, rdata->bytes); + task_io_account_read(rdata->got_bytes); + cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: case MID_RETRY_NEEDED: rdata->result = -EAGAIN; + if (server->sign && rdata->got_bytes) + /* reset bytes number since we can not check a sign */ + rdata->got_bytes = 0; + /* FIXME: should this be counted toward the initiating task? */ + task_io_account_read(rdata->got_bytes); + cifs_stats_bytes_read(tcon, rdata->got_bytes); break; default: if (rdata->result != -ENODATA) @@ -1762,11 +1753,12 @@ smb2_readv_callback(struct mid_q_entry *mid) int smb2_async_readv(struct cifs_readdata *rdata) { - int rc; + int rc, flags = 0; struct smb2_hdr *buf; struct cifs_io_parms io_parms; struct smb_rqst rqst = { .rq_iov = &rdata->iov, .rq_nvec = 1 }; + struct TCP_Server_Info *server; cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", __func__, rdata->offset, rdata->bytes); @@ -1777,18 +1769,41 @@ smb2_async_readv(struct cifs_readdata *rdata) io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; io_parms.pid = rdata->pid; + + server = io_parms.tcon->ses->server; + rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0); - if (rc) + if (rc) { + if (rc == -EAGAIN && rdata->credits) { + /* credits was reset by reconnect */ + rdata->credits = 0; + /* reduce in_flight value since we won't send the req */ + spin_lock(&server->req_lock); + server->in_flight--; + spin_unlock(&server->req_lock); + } return rc; + } buf = (struct smb2_hdr *)rdata->iov.iov_base; /* 4 for rfc1002 length field */ rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4; + if (rdata->credits) { + buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, + SMB2_MAX_BUFFER_SIZE)); + spin_lock(&server->req_lock); + server->credits += rdata->credits - + le16_to_cpu(buf->CreditCharge); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + flags = CIFS_HAS_CREDITS; + } + kref_get(&rdata->refcount); rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, cifs_readv_receive, smb2_readv_callback, - rdata, 0); + rdata, flags); if (rc) { kref_put(&rdata->refcount, cifs_readdata_release); cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); @@ -1906,15 +1921,25 @@ int smb2_async_writev(struct cifs_writedata *wdata, void (*release)(struct kref *kref)) { - int rc = -EACCES; + int rc = -EACCES, flags = 0; struct smb2_write_req *req = NULL; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; struct kvec iov; struct smb_rqst rqst; rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); - if (rc) + if (rc) { + if (rc == -EAGAIN && wdata->credits) { + /* credits was reset by reconnect */ + wdata->credits = 0; + /* reduce in_flight value since we won't send the req */ + spin_lock(&server->req_lock); + server->in_flight--; + spin_unlock(&server->req_lock); + } goto async_writev_out; + } req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); @@ -1947,9 +1972,20 @@ smb2_async_writev(struct cifs_writedata *wdata, inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); + if (wdata->credits) { + req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, + SMB2_MAX_BUFFER_SIZE)); + spin_lock(&server->req_lock); + server->credits += wdata->credits - + le16_to_cpu(req->hdr.CreditCharge); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + flags = CIFS_HAS_CREDITS; + } + kref_get(&wdata->refcount); - rc = cifs_call_async(tcon->ses->server, &rqst, NULL, - smb2_writev_callback, wdata, 0); + rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, wdata, + flags); if (rc) { kref_put(&wdata->refcount, release); @@ -2325,7 +2361,7 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, - u64 volatile_fid, u32 pid, __le64 *eof) + u64 volatile_fid, u32 pid, __le64 *eof, bool is_falloc) { struct smb2_file_eof_info info; void *data; @@ -2336,8 +2372,12 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, data = &info; size = sizeof(struct smb2_file_eof_info); - return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, - FILE_END_OF_FILE_INFORMATION, 1, &data, &size); + if (is_falloc) + return send_set_info(xid, tcon, persistent_fid, volatile_fid, + pid, FILE_ALLOCATION_INFORMATION, 1, &data, &size); + else + return send_set_info(xid, tcon, persistent_fid, volatile_fid, + pid, FILE_END_OF_FILE_INFORMATION, 1, &data, &size); } int diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 0ce48db20a6..67e8ce8055d 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -139,7 +139,7 @@ extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, __le16 *target_file); extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 pid, - __le64 *eof); + __le64 *eof, bool is_fallocate); extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 59c748ce872..5111e7272db 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -466,7 +466,12 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) static inline void smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr) { + unsigned int i, num = le16_to_cpu(hdr->CreditCharge); + hdr->MessageId = get_next_mid64(server); + /* skip message numbers according to CreditCharge field */ + for (i = 1; i < num; i++) + get_next_mid(server); } static struct mid_q_entry * diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 18cd5650a5f..9d087f4e7d4 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -448,6 +448,15 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout, return wait_for_free_credits(server, timeout, val); } +int +cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, + unsigned int *num, unsigned int *credits) +{ + *num = size; + *credits = 0; + return 0; +} + static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, struct mid_q_entry **ppmidQ) { @@ -531,20 +540,23 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, { int rc, timeout, optype; struct mid_q_entry *mid; + unsigned int credits = 0; timeout = flags & CIFS_TIMEOUT_MASK; optype = flags & CIFS_OP_MASK; - rc = wait_for_free_request(server, timeout, optype); - if (rc) - return rc; + if ((flags & CIFS_HAS_CREDITS) == 0) { + rc = wait_for_free_request(server, timeout, optype); + if (rc) + return rc; + credits = 1; + } mutex_lock(&server->srv_mutex); mid = server->ops->setup_async_request(server, rqst); if (IS_ERR(mid)) { mutex_unlock(&server->srv_mutex); - add_credits(server, 1, optype); - wake_up(&server->request_q); + add_credits_and_wake_if(server, credits, optype); return PTR_ERR(mid); } @@ -572,8 +584,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, return 0; cifs_delete_mid(mid); - add_credits(server, 1, optype); - wake_up(&server->request_q); + add_credits_and_wake_if(server, credits, optype); return rc; } diff --git a/fs/namespace.c b/fs/namespace.c index 2a1447c946e..0acabea5831 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -890,8 +890,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); /* Don't allow unprivileged users to change mount flags */ - if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) - mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + if (flag & CL_UNPRIVILEGED) { + mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; + + if (mnt->mnt.mnt_flags & MNT_READONLY) + mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + + if (mnt->mnt.mnt_flags & MNT_NODEV) + mnt->mnt.mnt_flags |= MNT_LOCK_NODEV; + + if (mnt->mnt.mnt_flags & MNT_NOSUID) + mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID; + + if (mnt->mnt.mnt_flags & MNT_NOEXEC) + mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC; + } /* Don't allow unprivileged users to reveal what is under a mount */ if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) @@ -1896,9 +1909,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) if (readonly_request == __mnt_is_readonly(mnt)) return 0; - if (mnt->mnt_flags & MNT_LOCK_READONLY) - return -EPERM; - if (readonly_request) error = mnt_make_readonly(real_mount(mnt)); else @@ -1924,6 +1934,33 @@ static int do_remount(struct path *path, int flags, int mnt_flags, if (path->dentry != path->mnt->mnt_root) return -EINVAL; + /* Don't allow changing of locked mnt flags. + * + * No locks need to be held here while testing the various + * MNT_LOCK flags because those flags can never be cleared + * once they are set. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(mnt_flags & MNT_READONLY)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(mnt_flags & MNT_NODEV)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && + !(mnt_flags & MNT_NOSUID)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) && + !(mnt_flags & MNT_NOEXEC)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) { + return -EPERM; + } + err = security_sb_remount(sb, data); if (err) return err; @@ -1937,7 +1974,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags, err = do_remount_sb(sb, flags, data, 0); if (!err) { lock_mount_hash(); - mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; + mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; mnt->mnt.mnt_flags = mnt_flags; touch_mnt_namespace(mnt->mnt_ns); unlock_mount_hash(); @@ -2122,7 +2159,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, */ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { flags |= MS_NODEV; - mnt_flags |= MNT_NODEV; + mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } } @@ -2436,6 +2473,14 @@ long do_mount(const char *dev_name, const char *dir_name, if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; + /* The default atime for remount is preservation */ + if ((flags & MS_REMOUNT) && + ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | + MS_STRICTATIME)) == 0)) { + mnt_flags &= ~MNT_ATIME_MASK; + mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; + } + flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); @@ -2972,13 +3017,13 @@ static void *mntns_get(struct task_struct *task) struct mnt_namespace *ns = NULL; struct nsproxy *nsproxy; - rcu_read_lock(); - nsproxy = task_nsproxy(task); + task_lock(task); + nsproxy = task->nsproxy; if (nsproxy) { ns = nsproxy->mnt_ns; get_mnt_ns(ns); } - rcu_read_unlock(); + task_unlock(task); return ns; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1d09289c8f0..180d1ec9c32 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1205,7 +1205,7 @@ static const struct file_operations nfs_server_list_fops = { .open = nfs_server_list_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, .owner = THIS_MODULE, }; @@ -1226,7 +1226,7 @@ static const struct file_operations nfs_volume_list_fops = { .open = nfs_volume_list_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, .owner = THIS_MODULE, }; @@ -1236,19 +1236,8 @@ static const struct file_operations nfs_volume_list_fops = { */ static int nfs_server_list_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; - struct net *net = pid_ns->child_reaper->nsproxy->net_ns; - - ret = seq_open(file, &nfs_server_list_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = net; - - return 0; + return seq_open_net(inode, file, &nfs_server_list_ops, + sizeof(struct seq_net_private)); } /* @@ -1256,7 +1245,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) */ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) { - struct nfs_net *nn = net_generic(m->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* lock the list against modification */ spin_lock(&nn->nfs_client_lock); @@ -1268,7 +1257,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); return seq_list_next(v, &nn->nfs_client_list, pos); } @@ -1278,7 +1267,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_server_list_stop(struct seq_file *p, void *v) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); spin_unlock(&nn->nfs_client_lock); } @@ -1289,7 +1278,7 @@ static void nfs_server_list_stop(struct seq_file *p, void *v) static int nfs_server_list_show(struct seq_file *m, void *v) { struct nfs_client *clp; - struct nfs_net *nn = net_generic(m->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_client_list) { @@ -1321,19 +1310,8 @@ static int nfs_server_list_show(struct seq_file *m, void *v) */ static int nfs_volume_list_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; - struct net *net = pid_ns->child_reaper->nsproxy->net_ns; - - ret = seq_open(file, &nfs_volume_list_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = net; - - return 0; + return seq_open_net(inode, file, &nfs_server_list_ops, + sizeof(struct seq_net_private)); } /* @@ -1341,7 +1319,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) */ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) { - struct nfs_net *nn = net_generic(m->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* lock the list against modification */ spin_lock(&nn->nfs_client_lock); @@ -1353,7 +1331,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); return seq_list_next(v, &nn->nfs_volume_list, pos); } @@ -1363,7 +1341,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_volume_list_stop(struct seq_file *p, void *v) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); spin_unlock(&nn->nfs_client_lock); } @@ -1376,7 +1354,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) struct nfs_server *server; struct nfs_client *clp; char dev[8], fsid[17]; - struct nfs_net *nn = net_generic(m->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_volume_list) { @@ -1407,6 +1385,45 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) return 0; } +int nfs_fs_proc_net_init(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct proc_dir_entry *p; + + nn->proc_nfsfs = proc_net_mkdir(net, "nfsfs", net->proc_net); + if (!nn->proc_nfsfs) + goto error_0; + + /* a file of servers with which we're dealing */ + p = proc_create("servers", S_IFREG|S_IRUGO, + nn->proc_nfsfs, &nfs_server_list_fops); + if (!p) + goto error_1; + + /* a file of volumes that we have mounted */ + p = proc_create("volumes", S_IFREG|S_IRUGO, + nn->proc_nfsfs, &nfs_volume_list_fops); + if (!p) + goto error_2; + return 0; + +error_2: + remove_proc_entry("servers", nn->proc_nfsfs); +error_1: + remove_proc_entry("fs/nfsfs", NULL); +error_0: + return -ENOMEM; +} + +void nfs_fs_proc_net_exit(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + remove_proc_entry("volumes", nn->proc_nfsfs); + remove_proc_entry("servers", nn->proc_nfsfs); + remove_proc_entry("fs/nfsfs", NULL); +} + /* * initialise the /proc/fs/nfsfs/ directory */ @@ -1419,14 +1436,12 @@ int __init nfs_fs_proc_init(void) goto error_0; /* a file of servers with which we're dealing */ - p = proc_create("servers", S_IFREG|S_IRUGO, - proc_fs_nfs, &nfs_server_list_fops); + p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers"); if (!p) goto error_1; /* a file of volumes that we have mounted */ - p = proc_create("volumes", S_IFREG|S_IRUGO, - proc_fs_nfs, &nfs_volume_list_fops); + p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes"); if (!p) goto error_2; return 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index abd37a38053..68921b01b79 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1840,11 +1840,12 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { nfs_clients_init(net); - return 0; + return nfs_fs_proc_net_init(net); } static void nfs_net_exit(struct net *net) { + nfs_fs_proc_net_exit(net); nfs_cleanup_cb_ident_idr(net); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 617f36611d4..e2a45ae5014 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -195,7 +195,16 @@ extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); +extern int nfs_fs_proc_net_init(struct net *net); +extern void nfs_fs_proc_net_exit(struct net *net); #else +static inline int nfs_fs_proc_net_init(struct net *net) +{ + return 0; +} +static inline void nfs_fs_proc_net_exit(struct net *net) +{ +} static inline int nfs_fs_proc_init(void) { return 0; diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 8ee1fab8326..ef221fb8a18 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -29,6 +29,9 @@ struct nfs_net { #endif spinlock_t nfs_client_lock; struct timespec boot_time; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_nfsfs; +#endif }; extern int nfs_net_id; diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index a986ceb6fd0..4cd7c69a6cb 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -47,7 +47,7 @@ struct svc_rqst; #define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ / sizeof(struct nfs4_ace)) -struct nfs4_acl *nfs4_acl_new(int); +int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 72f44823adb..9d46a0bdd9f 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) validate_process_creds(); /* discard any old override before preparing the new set */ - revert_creds(get_cred(current->real_cred)); + revert_creds(get_cred(current_real_cred())); new = prepare_creds(); if (!new) return -ENOMEM; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 13b85f94d9e..72ffd7cce3c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -698,8 +698,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) kref_get(&item->ex_client->ref); new->ex_client = item->ex_client; - new->ex_path.dentry = dget(item->ex_path.dentry); - new->ex_path.mnt = mntget(item->ex_path.mnt); + new->ex_path = item->ex_path; + path_get(&item->ex_path); new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; @@ -1253,7 +1253,7 @@ static int e_show(struct seq_file *m, void *p) return 0; } - cache_get(&exp->h); + exp_get(exp); if (cache_check(cd, &exp->h, NULL)) return 0; exp_put(exp); diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index cfeea85c5be..04dc8c167b0 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -101,9 +101,10 @@ static inline void exp_put(struct svc_export *exp) cache_put(&exp->h, exp->cd); } -static inline void exp_get(struct svc_export *exp) +static inline struct svc_export *exp_get(struct svc_export *exp) { cache_get(&exp->h); + return exp; } struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 2ed05c3cd43..c16bf5af683 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -17,81 +17,13 @@ struct nfsd_fault_inject_op { char *file; - u64 (*forget)(struct nfs4_client *, u64); - u64 (*print)(struct nfs4_client *, u64); + u64 (*get)(void); + u64 (*set_val)(u64); + u64 (*set_clnt)(struct sockaddr_storage *, size_t); }; -static struct nfsd_fault_inject_op inject_ops[] = { - { - .file = "forget_clients", - .forget = nfsd_forget_client, - .print = nfsd_print_client, - }, - { - .file = "forget_locks", - .forget = nfsd_forget_client_locks, - .print = nfsd_print_client_locks, - }, - { - .file = "forget_openowners", - .forget = nfsd_forget_client_openowners, - .print = nfsd_print_client_openowners, - }, - { - .file = "forget_delegations", - .forget = nfsd_forget_client_delegations, - .print = nfsd_print_client_delegations, - }, - { - .file = "recall_delegations", - .forget = nfsd_recall_client_delegations, - .print = nfsd_print_client_delegations, - }, -}; - -static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); static struct dentry *debug_dir; -static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) -{ - u64 count = 0; - - if (val == 0) - printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); - else - printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); - - nfs4_lock_state(); - count = nfsd_for_n_state(val, op->forget); - nfs4_unlock_state(); - printk(KERN_INFO "NFSD: %s: found %llu", op->file, count); -} - -static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op, - struct sockaddr_storage *addr, - size_t addr_size) -{ - char buf[INET6_ADDRSTRLEN]; - struct nfs4_client *clp; - u64 count; - - nfs4_lock_state(); - clp = nfsd_find_client(addr, addr_size); - if (clp) { - count = op->forget(clp, 0); - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count); - } - nfs4_unlock_state(); -} - -static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val) -{ - nfs4_lock_state(); - *val = nfsd_for_n_state(0, op->print); - nfs4_unlock_state(); -} - static ssize_t fault_inject_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { @@ -99,9 +31,10 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf, char read_buf[25]; size_t size; loff_t pos = *ppos; + struct nfsd_fault_inject_op *op = file_inode(file)->i_private; if (!pos) - nfsd_inject_get(file_inode(file)->i_private, &val); + val = op->get(); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); return simple_read_from_buffer(buf, len, ppos, read_buf, size); @@ -114,18 +47,36 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, size_t size = min(sizeof(write_buf) - 1, len); struct net *net = current->nsproxy->net_ns; struct sockaddr_storage sa; + struct nfsd_fault_inject_op *op = file_inode(file)->i_private; u64 val; + char *nl; if (copy_from_user(write_buf, buf, size)) return -EFAULT; write_buf[size] = '\0'; + /* Deal with any embedded newlines in the string */ + nl = strchr(write_buf, '\n'); + if (nl) { + size = nl - write_buf; + *nl = '\0'; + } + size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); - if (size > 0) - nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); - else { + if (size > 0) { + val = op->set_clnt(&sa, size); + if (val) + pr_info("NFSD [%s]: Client %s had %llu state object(s)\n", + op->file, write_buf, val); + } else { val = simple_strtoll(write_buf, NULL, 0); - nfsd_inject_set(file_inode(file)->i_private, val); + if (val == 0) + pr_info("NFSD Fault Injection: %s (all)", op->file); + else + pr_info("NFSD Fault Injection: %s (n = %llu)", + op->file, val); + val = op->set_val(val); + pr_info("NFSD: %s: found %llu", op->file, val); } return len; /* on success, claim we got the whole input */ } @@ -141,6 +92,41 @@ void nfsd_fault_inject_cleanup(void) debugfs_remove_recursive(debug_dir); } +static struct nfsd_fault_inject_op inject_ops[] = { + { + .file = "forget_clients", + .get = nfsd_inject_print_clients, + .set_val = nfsd_inject_forget_clients, + .set_clnt = nfsd_inject_forget_client, + }, + { + .file = "forget_locks", + .get = nfsd_inject_print_locks, + .set_val = nfsd_inject_forget_locks, + .set_clnt = nfsd_inject_forget_client_locks, + }, + { + .file = "forget_openowners", + .get = nfsd_inject_print_openowners, + .set_val = nfsd_inject_forget_openowners, + .set_clnt = nfsd_inject_forget_client_openowners, + }, + { + .file = "forget_delegations", + .get = nfsd_inject_print_delegations, + .set_val = nfsd_inject_forget_delegations, + .set_clnt = nfsd_inject_forget_client_delegations, + }, + { + .file = "recall_delegations", + .get = nfsd_inject_print_delegations, + .set_val = nfsd_inject_recall_delegations, + .set_clnt = nfsd_inject_recall_client_delegations, + }, +}; + +#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op)) + int nfsd_fault_inject_init(void) { unsigned int i; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index d32b3aa6600..ea6749a3276 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -29,14 +29,19 @@ #define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) #define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) -#define LOCKOWNER_INO_HASH_BITS 8 -#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) - #define SESSION_HASH_SIZE 512 struct cld_net; struct nfsd4_client_tracking_ops; +/* + * Represents a nfsd "container". With respect to nfsv4 state tracking, the + * fields of interest are the *_id_hashtbls and the *_name_tree. These track + * the nfs4_client objects by either short or long form clientid. + * + * Each nfsd_net runs a nfs4_laundromat workqueue job when necessary to clean + * up expired clients and delegations within the container. + */ struct nfsd_net { struct cld_net *cld_net; @@ -66,8 +71,6 @@ struct nfsd_net { struct rb_root conf_name_tree; struct list_head *unconf_id_hashtbl; struct rb_root unconf_name_tree; - struct list_head *ownerstr_hashtbl; - struct list_head *lockowner_ino_hashtbl; struct list_head *sessionid_hashtbl; /* * client_lru holds client queue ordered by nfs4_client.cl_time @@ -97,10 +100,16 @@ struct nfsd_net { bool nfsd_net_up; bool lockd_up; + /* Time of server startup */ + struct timeval nfssvc_boot; + /* - * Time of server startup + * Max number of connections this nfsd container will allow. Defaults + * to '0' which is means that it bases this on the number of threads. */ - struct timeval nfssvc_boot; + unsigned int max_connections; + + u32 clientid_counter; struct svc_serv *nfsd_serv; }; diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 12b023a7ab7..ac54ea60b3f 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -54,14 +54,14 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { acl = get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); - goto fail; - } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } + if (IS_ERR(acl)) { + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; + } resp->acl_access = acl; } if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 2a514e21dc7..34cbbab6abd 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -47,14 +47,14 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { acl = get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); - goto fail; - } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } + if (IS_ERR(acl)) { + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; + } resp->acl_access = acl; } if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 40128991313..fa2525b2e9d 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -157,11 +157,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - - resp->count = argp->count; - if (max_blocksize < resp->count) - resp->count = max_blocksize; - + resp->count = min(argp->count, max_blocksize); svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); @@ -286,8 +282,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp, fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, - argp->tname, argp->tlen, - &resp->fh, &argp->attrs); + argp->tname, &resp->fh); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index e6c01e80325..39c5eb3ad33 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -120,10 +120,7 @@ decode_sattr3(__be32 *p, struct iattr *iap) iap->ia_valid |= ATTR_SIZE; p = xdr_decode_hyper(p, &newsize); - if (newsize <= NFS_OFFSET_MAX) - iap->ia_size = newsize; - else - iap->ia_size = NFS_OFFSET_MAX; + iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX); } if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ iap->ia_valid |= ATTR_ATIME; @@ -338,10 +335,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, return 0; p = xdr_decode_hyper(p, &args->offset); - len = args->count = ntohl(*p++); - - if (len > max_blocksize) - len = max_blocksize; + args->count = ntohl(*p++); + len = min(args->count, max_blocksize); /* set up the kvec */ v=0; @@ -349,7 +344,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct page *p = *(rqstp->rq_next_page++); rqstp->rq_vec[v].iov_base = page_address(p); - rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); len -= rqstp->rq_vec[v].iov_len; v++; } @@ -484,9 +479,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, } /* now copy next page if there is one */ if (len && !avail && rqstp->rq_arg.page_len) { - avail = rqstp->rq_arg.page_len; - if (avail > PAGE_SIZE) - avail = PAGE_SIZE; + avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE); old = page_address(rqstp->rq_arg.pages[0]); } while (len && avail && *old) { @@ -571,10 +564,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, args->verf = p; p += 2; args->dircount = ~0; args->count = ntohl(*p++); - - if (args->count > PAGE_SIZE) - args->count = PAGE_SIZE; - + args->count = min_t(u32, args->count, PAGE_SIZE); args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); @@ -595,10 +585,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->dircount = ntohl(*p++); args->count = ntohl(*p++); - len = (args->count > max_blocksize) ? max_blocksize : - args->count; - args->count = len; - + len = args->count = min(args->count, max_blocksize); while (len > 0) { struct page *p = *(rqstp->rq_next_page++); if (!args->buffer) @@ -913,8 +900,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, */ /* truncate filename if too long */ - if (namlen > NFS3_MAXNAMLEN) - namlen = NFS3_MAXNAMLEN; + namlen = min(namlen, NFS3_MAXNAMLEN); slen = XDR_QUADLEN(namlen); elen = slen + NFS3_ENTRY_BAGGAGE diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index d714156a19f..59fd7665178 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -146,35 +146,43 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, int size = 0; pacl = get_acl(inode, ACL_TYPE_ACCESS); - if (!pacl) { + if (!pacl) pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); - if (IS_ERR(pacl)) - return PTR_ERR(pacl); - } + + if (IS_ERR(pacl)) + return PTR_ERR(pacl); + /* allocate for worst case: one (deny, allow) pair each: */ size += 2 * pacl->a_count; if (S_ISDIR(inode->i_mode)) { flags = NFS4_ACL_DIR; dpacl = get_acl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(dpacl)) { + error = PTR_ERR(dpacl); + goto rel_pacl; + } + if (dpacl) size += 2 * dpacl->a_count; } - *acl = nfs4_acl_new(size); + *acl = kmalloc(nfs4_acl_bytes(size), GFP_KERNEL); if (*acl == NULL) { error = -ENOMEM; goto out; } + (*acl)->naces = 0; _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT); if (dpacl) _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); - out: - posix_acl_release(pacl); +out: posix_acl_release(dpacl); +rel_pacl: + posix_acl_release(pacl); return error; } @@ -872,16 +880,13 @@ ace2type(struct nfs4_ace *ace) return -1; } -struct nfs4_acl * -nfs4_acl_new(int n) +/* + * return the size of the struct nfs4_acl required to represent an acl + * with @entries entries. + */ +int nfs4_acl_bytes(int entries) { - struct nfs4_acl *acl; - - acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL); - if (acl == NULL) - return NULL; - acl->naces = 0; - return acl; + return sizeof(struct nfs4_acl) + entries * sizeof(struct nfs4_ace); } static struct { @@ -935,5 +940,5 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who) return 0; } WARN_ON_ONCE(1); - return -1; + return nfserr_serverfault; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 2c73cae9899..e0be57b0f79 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -337,7 +337,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4); *p++ = xdr_zero; /* truncate */ - encode_nfs_fh4(xdr, &dp->dl_fh); + encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle); hdr->nops++; } @@ -678,7 +678,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; args.client_name = clp->cl_cred.cr_principal; - args.prognumber = conn->cb_prog, + args.prognumber = conn->cb_prog; args.protocol = XPRT_TRANSPORT_TCP; args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; @@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; - args.protocol = XPRT_TRANSPORT_BC_TCP; + args.protocol = conn->cb_xprt->xpt_class->xcl_ident | + XPRT_TRANSPORT_BC; args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ @@ -904,7 +905,7 @@ static void nfsd4_cb_recall_release(void *calldata) spin_lock(&clp->cl_lock); list_del(&cb->cb_per_client); spin_unlock(&clp->cl_lock); - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); } } @@ -933,7 +934,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); /* * Note this won't actually result in a null callback; - * instead, nfsd4_do_callback_rpc() will detect the killed + * instead, nfsd4_run_cb_null() will detect the killed * client, destroy the rpc client, and stop: */ do_probe_callback(clp); @@ -1011,9 +1012,9 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) run_nfsd4_cb(cb); } -static void nfsd4_do_callback_rpc(struct work_struct *w) +static void +nfsd4_run_callback_rpc(struct nfsd4_callback *cb) { - struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; @@ -1031,9 +1032,22 @@ static void nfsd4_do_callback_rpc(struct work_struct *w) cb->cb_ops, cb); } -void nfsd4_init_callback(struct nfsd4_callback *cb) +void +nfsd4_run_cb_null(struct work_struct *w) { - INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, + cb_work); + nfsd4_run_callback_rpc(cb); +} + +void +nfsd4_run_cb_recall(struct work_struct *w) +{ + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, + cb_work); + + nfsd4_prepare_cb_recall(cb->cb_op); + nfsd4_run_callback_rpc(cb); } void nfsd4_cb_recall(struct nfs4_delegation *dp) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8f029db5d27..5e0dc528a0e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -177,7 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src) fh_put(dst); dget(src->fh_dentry); if (src->fh_export) - cache_get(&src->fh_export->h); + exp_get(src->fh_export); *dst = *src; } @@ -385,8 +385,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (nfsd4_has_session(cstate)) copy_clientid(&open->op_clientid, cstate->session); - nfs4_lock_state(); - /* check seqid for replay. set nfs4_owner */ resp = rqstp->rq_resp; status = nfsd4_process_open1(&resp->cstate, open, nn); @@ -431,8 +429,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_PREVIOUS: status = nfs4_check_open_reclaim(&open->op_clientid, - cstate->minorversion, - nn); + cstate, nn); if (status) goto out; open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; @@ -461,19 +458,17 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * set, (2) sets open->op_stateid, (3) sets open->op_delegation. */ status = nfsd4_process_open2(rqstp, resfh, open); - WARN_ON(status && open->op_created); + WARN(status && open->op_created, + "nfsd4_process_open2 failed to open newly-created file! status=%u\n", + be32_to_cpu(status)); out: if (resfh && resfh != &cstate->current_fh) { fh_dup2(&cstate->current_fh, resfh); fh_put(resfh); kfree(resfh); } - nfsd4_cleanup_open_state(open, status); - if (open->op_openowner && !nfsd4_has_session(cstate)) - cstate->replay_owner = &open->op_openowner->oo_owner; + nfsd4_cleanup_open_state(cstate, open, status); nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } @@ -581,8 +576,12 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) __be32 verf[2]; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - verf[0] = (__be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__be32)nn->nfssvc_boot.tv_usec; + /* + * This is opaque to client, so no need to byte-swap. Use + * __force to keep sparse happy + */ + verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; memcpy(verifier->data, verf, sizeof(verifier->data)); } @@ -619,8 +618,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_linkname, create->cr_linklen, - &resfh, &create->cr_iattr); + create->cr_data, &resfh); break; case NF4BLK: @@ -909,8 +907,8 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat default: return nfserr_inval; } - exp_get(cstate->current_fh.fh_export); - sin->sin_exp = cstate->current_fh.fh_export; + + sin->sin_exp = exp_get(cstate->current_fh.fh_export); fh_put(&cstate->current_fh); return nfs_ok; } @@ -1289,7 +1287,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - rqstp->rq_usedeferral = 0; + rqstp->rq_usedeferral = false; /* * According to RFC3010, this takes precedence over all other errors. @@ -1391,10 +1389,7 @@ encode_op: args->ops, args->opcnt, resp->opcnt, op->opnum, be32_to_cpu(status)); - if (cstate->replay_owner) { - nfs4_unlock_state(); - cstate->replay_owner = NULL; - } + nfsd4_cstate_clear_replay(cstate); /* XXX Ugh, we need to get rid of this kind of special case: */ if (op->opnum == OP_READ && op->u.read.rd_filp) fput(op->u.read.rd_filp); @@ -1408,7 +1403,7 @@ encode_op: BUG_ON(cstate->replay_owner); out: /* Reset deferral mechanism for RPC deferrals */ - rqstp->rq_usedeferral = 1; + rqstp->rq_usedeferral = true; dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } @@ -1520,21 +1515,17 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) u32 maxcount = 0, rlen = 0; maxcount = svc_max_payload(rqstp); - rlen = op->u.read.rd_length; - - if (rlen > maxcount) - rlen = maxcount; + rlen = min(op->u.read.rd_length, maxcount); return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - u32 maxcount = svc_max_payload(rqstp); - u32 rlen = op->u.readdir.rd_maxcount; + u32 maxcount = 0, rlen = 0; - if (rlen > maxcount) - rlen = maxcount; + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.readdir.rd_maxcount, maxcount); return (op_encode_hdr_size + op_encode_verifier_maxsz + XDR_QUADLEN(rlen)) * sizeof(__be32); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2204e1fe572..2e80a59e7e9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -70,13 +70,11 @@ static u64 current_sessionid = 1; #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) /* forward declarations */ -static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); +static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); +static void nfs4_free_ol_stateid(struct nfs4_stid *stid); /* Locking: */ -/* Currently used for almost all code touching nfsv4 state: */ -static DEFINE_MUTEX(client_mutex); - /* * Currently used for the del_recall_lru and file hash table. In an * effort to decrease the scope of the client_mutex, this spinlock may @@ -84,18 +82,18 @@ static DEFINE_MUTEX(client_mutex); */ static DEFINE_SPINLOCK(state_lock); +/* + * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for + * the refcount on the open stateid to drop. + */ +static DECLARE_WAIT_QUEUE_HEAD(close_wq); + static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; static struct kmem_cache *stateid_slab; static struct kmem_cache *deleg_slab; -void -nfs4_lock_state(void) -{ - mutex_lock(&client_mutex); -} - static void free_session(struct nfsd4_session *); static bool is_session_dead(struct nfsd4_session *ses) @@ -103,12 +101,6 @@ static bool is_session_dead(struct nfsd4_session *ses) return ses->se_flags & NFS4_SESSION_DEAD; } -void nfsd4_put_session(struct nfsd4_session *ses) -{ - if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) - free_session(ses); -} - static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { if (atomic_read(&ses->se_ref) > ref_held_by_me) @@ -117,46 +109,17 @@ static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_b return nfs_ok; } -static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) -{ - if (is_session_dead(ses)) - return nfserr_badsession; - atomic_inc(&ses->se_ref); - return nfs_ok; -} - -void -nfs4_unlock_state(void) -{ - mutex_unlock(&client_mutex); -} - static bool is_client_expired(struct nfs4_client *clp) { return clp->cl_time == 0; } -static __be32 mark_client_expired_locked(struct nfs4_client *clp) -{ - if (atomic_read(&clp->cl_refcount)) - return nfserr_jukebox; - clp->cl_time = 0; - return nfs_ok; -} - -static __be32 mark_client_expired(struct nfs4_client *clp) +static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - __be32 ret; - spin_lock(&nn->client_lock); - ret = mark_client_expired_locked(clp); - spin_unlock(&nn->client_lock); - return ret; -} + lockdep_assert_held(&nn->client_lock); -static __be32 get_client_locked(struct nfs4_client *clp) -{ if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_refcount); @@ -197,13 +160,17 @@ renew_client(struct nfs4_client *clp) static void put_client_renew_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + if (!atomic_dec_and_test(&clp->cl_refcount)) return; if (!is_client_expired(clp)) renew_client_locked(clp); } -void put_client_renew(struct nfs4_client *clp) +static void put_client_renew(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -214,6 +181,79 @@ void put_client_renew(struct nfs4_client *clp) spin_unlock(&nn->client_lock); } +static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) +{ + __be32 status; + + if (is_session_dead(ses)) + return nfserr_badsession; + status = get_client_locked(ses->se_client); + if (status) + return status; + atomic_inc(&ses->se_ref); + return nfs_ok; +} + +static void nfsd4_put_session_locked(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); + put_client_renew_locked(clp); +} + +static void nfsd4_put_session(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + nfsd4_put_session_locked(ses); + spin_unlock(&nn->client_lock); +} + +static int +same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) +{ + return (sop->so_owner.len == owner->len) && + 0 == memcmp(sop->so_owner.data, owner->data, owner->len); +} + +static struct nfs4_openowner * +find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, + struct nfs4_client *clp) +{ + struct nfs4_stateowner *so; + + lockdep_assert_held(&clp->cl_lock); + + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval], + so_strhash) { + if (!so->so_is_open_owner) + continue; + if (same_owner_str(so, &open->op_owner)) { + atomic_inc(&so->so_count); + return openowner(so); + } + } + return NULL; +} + +static struct nfs4_openowner * +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, + struct nfs4_client *clp) +{ + struct nfs4_openowner *oo; + + spin_lock(&clp->cl_lock); + oo = find_openstateowner_str_locked(hashval, open, clp); + spin_unlock(&clp->cl_lock); + return oo; +} static inline u32 opaque_hashval(const void *ptr, int nbytes) @@ -236,10 +276,11 @@ static void nfsd4_free_file(struct nfs4_file *f) static inline void put_nfs4_file(struct nfs4_file *fi) { + might_lock(&state_lock); + if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del(&fi->fi_hash); spin_unlock(&state_lock); - iput(fi->fi_inode); nfsd4_free_file(fi); } } @@ -250,7 +291,80 @@ get_nfs4_file(struct nfs4_file *fi) atomic_inc(&fi->fi_ref); } -static int num_delegations; +static struct file * +__nfs4_get_fd(struct nfs4_file *f, int oflag) +{ + if (f->fi_fds[oflag]) + return get_file(f->fi_fds[oflag]); + return NULL; +} + +static struct file * +find_writeable_file_locked(struct nfs4_file *f) +{ + struct file *ret; + + lockdep_assert_held(&f->fi_lock); + + ret = __nfs4_get_fd(f, O_WRONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDWR); + return ret; +} + +static struct file * +find_writeable_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = find_writeable_file_locked(f); + spin_unlock(&f->fi_lock); + + return ret; +} + +static struct file *find_readable_file_locked(struct nfs4_file *f) +{ + struct file *ret; + + lockdep_assert_held(&f->fi_lock); + + ret = __nfs4_get_fd(f, O_RDONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDWR); + return ret; +} + +static struct file * +find_readable_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = find_readable_file_locked(f); + spin_unlock(&f->fi_lock); + + return ret; +} + +static struct file * +find_any_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = __nfs4_get_fd(f, O_RDWR); + if (!ret) { + ret = __nfs4_get_fd(f, O_WRONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDONLY); + } + spin_unlock(&f->fi_lock); + return ret; +} + +static atomic_long_t num_delegations; unsigned long max_delegations; /* @@ -262,12 +376,11 @@ unsigned long max_delegations; #define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) #define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) -static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) { unsigned int ret; ret = opaque_hashval(ownername->data, ownername->len); - ret += clientid; return ret & OWNER_HASH_MASK; } @@ -275,75 +388,124 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -static unsigned int file_hashval(struct inode *ino) +static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh) +{ + return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0); +} + +static unsigned int file_hashval(struct knfsd_fh *fh) +{ + return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); +} + +static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) { - /* XXX: why are we hashing on inode pointer, anyway? */ - return hash_ptr(ino, FILE_HASH_BITS); + return fh1->fh_size == fh2->fh_size && + !memcmp(fh1->fh_base.fh_pad, + fh2->fh_base.fh_pad, + fh1->fh_size); } static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; -static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) +static void +__nfs4_file_get_access(struct nfs4_file *fp, u32 access) { - WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); - atomic_inc(&fp->fi_access[oflag]); + lockdep_assert_held(&fp->fi_lock); + + if (access & NFS4_SHARE_ACCESS_WRITE) + atomic_inc(&fp->fi_access[O_WRONLY]); + if (access & NFS4_SHARE_ACCESS_READ) + atomic_inc(&fp->fi_access[O_RDONLY]); } -static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) +static __be32 +nfs4_file_get_access(struct nfs4_file *fp, u32 access) { - if (oflag == O_RDWR) { - __nfs4_file_get_access(fp, O_RDONLY); - __nfs4_file_get_access(fp, O_WRONLY); - } else - __nfs4_file_get_access(fp, oflag); + lockdep_assert_held(&fp->fi_lock); + + /* Does this access mode make sense? */ + if (access & ~NFS4_SHARE_ACCESS_BOTH) + return nfserr_inval; + + /* Does it conflict with a deny mode already set? */ + if ((access & fp->fi_share_deny) != 0) + return nfserr_share_denied; + + __nfs4_file_get_access(fp, access); + return nfs_ok; } -static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) +static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny) { - if (fp->fi_fds[oflag]) { - fput(fp->fi_fds[oflag]); - fp->fi_fds[oflag] = NULL; + /* Common case is that there is no deny mode. */ + if (deny) { + /* Does this deny mode make sense? */ + if (deny & ~NFS4_SHARE_DENY_BOTH) + return nfserr_inval; + + if ((deny & NFS4_SHARE_DENY_READ) && + atomic_read(&fp->fi_access[O_RDONLY])) + return nfserr_share_denied; + + if ((deny & NFS4_SHARE_DENY_WRITE) && + atomic_read(&fp->fi_access[O_WRONLY])) + return nfserr_share_denied; } + return nfs_ok; } static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { - if (atomic_dec_and_test(&fp->fi_access[oflag])) { - nfs4_file_put_fd(fp, oflag); + might_lock(&fp->fi_lock); + + if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) { + struct file *f1 = NULL; + struct file *f2 = NULL; + + swap(f1, fp->fi_fds[oflag]); if (atomic_read(&fp->fi_access[1 - oflag]) == 0) - nfs4_file_put_fd(fp, O_RDWR); + swap(f2, fp->fi_fds[O_RDWR]); + spin_unlock(&fp->fi_lock); + if (f1) + fput(f1); + if (f2) + fput(f2); } } -static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) +static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) { - if (oflag == O_RDWR) { - __nfs4_file_put_access(fp, O_RDONLY); + WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH); + + if (access & NFS4_SHARE_ACCESS_WRITE) __nfs4_file_put_access(fp, O_WRONLY); - } else - __nfs4_file_put_access(fp, oflag); + if (access & NFS4_SHARE_ACCESS_READ) + __nfs4_file_put_access(fp, O_RDONLY); } -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct -kmem_cache *slab) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, + struct kmem_cache *slab) { - struct idr *stateids = &cl->cl_stateids; struct nfs4_stid *stid; int new_id; - stid = kmem_cache_alloc(slab, GFP_KERNEL); + stid = kmem_cache_zalloc(slab, GFP_KERNEL); if (!stid) return NULL; - new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); + idr_preload(GFP_KERNEL); + spin_lock(&cl->cl_lock); + new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT); + spin_unlock(&cl->cl_lock); + idr_preload_end(); if (new_id < 0) goto out_free; stid->sc_client = cl; - stid->sc_type = 0; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - stid->sc_stateid.si_generation = 0; + atomic_set(&stid->sc_count, 1); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -360,9 +522,24 @@ out_free: return NULL; } -static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) +static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { - return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); + struct nfs4_stid *stid; + struct nfs4_ol_stateid *stp; + + stid = nfs4_alloc_stid(clp, stateid_slab); + if (!stid) + return NULL; + + stp = openlockstateid(stid); + stp->st_stid.sc_free = nfs4_free_ol_stateid; + return stp; +} + +static void nfs4_free_deleg(struct nfs4_stid *stid) +{ + kmem_cache_free(deleg_slab, stid); + atomic_long_dec(&num_delegations); } /* @@ -379,10 +556,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) * Each filter is 256 bits. We hash the filehandle to 32bit and use the * low 3 bytes as hash-table indices. * - * 'state_lock', which is always held when block_delegations() is called, + * 'blocked_delegations_lock', which is always taken in block_delegations(), * is used to manage concurrent access. Testing does not need the lock * except when swapping the two filters. */ +static DEFINE_SPINLOCK(blocked_delegations_lock); static struct bloom_pair { int entries, old_entries; time_t swap_time; @@ -398,7 +576,7 @@ static int delegation_blocked(struct knfsd_fh *fh) if (bd->entries == 0) return 0; if (seconds_since_boot() - bd->swap_time > 30) { - spin_lock(&state_lock); + spin_lock(&blocked_delegations_lock); if (seconds_since_boot() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; @@ -407,7 +585,7 @@ static int delegation_blocked(struct knfsd_fh *fh) bd->new = 1-bd->new; bd->swap_time = seconds_since_boot(); } - spin_unlock(&state_lock); + spin_unlock(&blocked_delegations_lock); } hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && @@ -430,69 +608,73 @@ static void block_delegations(struct knfsd_fh *fh) hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); __set_bit((hash>>8)&255, bd->set[bd->new]); __set_bit((hash>>16)&255, bd->set[bd->new]); if (bd->entries == 0) bd->swap_time = seconds_since_boot(); bd->entries += 1; + spin_unlock(&blocked_delegations_lock); } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) +alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; + long n; dprintk("NFSD alloc_init_deleg\n"); - if (num_delegations > max_delegations) - return NULL; + n = atomic_long_inc_return(&num_delegations); + if (n < 0 || n > max_delegations) + goto out_dec; if (delegation_blocked(¤t_fh->fh_handle)) - return NULL; + goto out_dec; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) - return dp; + goto out_dec; + + dp->dl_stid.sc_free = nfs4_free_deleg; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid * 0 anyway just for consistency and use 1: */ dp->dl_stid.sc_stateid.si_generation = 1; - num_delegations++; INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - dp->dl_file = NULL; dp->dl_type = NFS4_OPEN_DELEGATE_READ; - fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); - dp->dl_time = 0; - atomic_set(&dp->dl_count, 1); - nfsd4_init_callback(&dp->dl_recall); + INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); return dp; +out_dec: + atomic_long_dec(&num_delegations); + return NULL; } -static void remove_stid(struct nfs4_stid *s) +void +nfs4_put_stid(struct nfs4_stid *s) { - struct idr *stateids = &s->sc_client->cl_stateids; + struct nfs4_file *fp = s->sc_file; + struct nfs4_client *clp = s->sc_client; - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); -} + might_lock(&clp->cl_lock); -static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) -{ - kmem_cache_free(slab, s); -} - -void -nfs4_put_delegation(struct nfs4_delegation *dp) -{ - if (atomic_dec_and_test(&dp->dl_count)) { - nfs4_free_stid(deleg_slab, &dp->dl_stid); - num_delegations--; + if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) { + wake_up_all(&close_wq); + return; } + idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + spin_unlock(&clp->cl_lock); + s->sc_free(s); + if (fp) + put_nfs4_file(fp); } static void nfs4_put_deleg_lease(struct nfs4_file *fp) { + lockdep_assert_held(&state_lock); + if (!fp->fi_lease) return; if (atomic_dec_and_test(&fp->fi_delegees)) { @@ -512,54 +694,54 @@ static void hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } -/* Called under the state lock. */ static void -unhash_delegation(struct nfs4_delegation *dp) +unhash_delegation_locked(struct nfs4_delegation *dp) { - spin_lock(&state_lock); - list_del_init(&dp->dl_perclnt); - list_del_init(&dp->dl_perfile); - list_del_init(&dp->dl_recall_lru); - spin_unlock(&state_lock); - if (dp->dl_file) { - nfs4_put_deleg_lease(dp->dl_file); - put_nfs4_file(dp->dl_file); - dp->dl_file = NULL; - } -} - + struct nfs4_file *fp = dp->dl_stid.sc_file; + lockdep_assert_held(&state_lock); -static void destroy_revoked_delegation(struct nfs4_delegation *dp) -{ + dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; + /* Ensure that deleg break won't try to requeue it */ + ++dp->dl_time; + spin_lock(&fp->fi_lock); + list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_recall_lru); - remove_stid(&dp->dl_stid); - nfs4_put_delegation(dp); + list_del_init(&dp->dl_perfile); + spin_unlock(&fp->fi_lock); + if (fp) + nfs4_put_deleg_lease(fp); } static void destroy_delegation(struct nfs4_delegation *dp) { - unhash_delegation(dp); - remove_stid(&dp->dl_stid); - nfs4_put_delegation(dp); + spin_lock(&state_lock); + unhash_delegation_locked(dp); + spin_unlock(&state_lock); + nfs4_put_stid(&dp->dl_stid); } static void revoke_delegation(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; + WARN_ON(!list_empty(&dp->dl_recall_lru)); + if (clp->cl_minorversion == 0) - destroy_delegation(dp); + nfs4_put_stid(&dp->dl_stid); else { - unhash_delegation(dp); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + spin_lock(&clp->cl_lock); list_add(&dp->dl_recall_lru, &clp->cl_revoked); + spin_unlock(&clp->cl_lock); } } @@ -607,57 +789,62 @@ bmap_to_share_mode(unsigned long bmap) { return access; } -static bool -test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { - unsigned int access, deny; - - access = bmap_to_share_mode(stp->st_access_bmap); - deny = bmap_to_share_mode(stp->st_deny_bmap); - if ((access & open->op_share_deny) || (deny & open->op_share_access)) - return false; - return true; -} - /* set share access for a given stateid */ static inline void set_access(u32 access, struct nfs4_ol_stateid *stp) { - __set_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap |= mask; } /* clear share access for a given stateid */ static inline void clear_access(u32 access, struct nfs4_ol_stateid *stp) { - __clear_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap &= ~mask; } /* test whether a given stateid has access */ static inline bool test_access(u32 access, struct nfs4_ol_stateid *stp) { - return test_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + return (bool)(stp->st_access_bmap & mask); } /* set share deny for a given stateid */ static inline void -set_deny(u32 access, struct nfs4_ol_stateid *stp) +set_deny(u32 deny, struct nfs4_ol_stateid *stp) { - __set_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap |= mask; } /* clear share deny for a given stateid */ static inline void -clear_deny(u32 access, struct nfs4_ol_stateid *stp) +clear_deny(u32 deny, struct nfs4_ol_stateid *stp) { - __clear_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap &= ~mask; } /* test whether a given stateid is denying specific access */ static inline bool -test_deny(u32 access, struct nfs4_ol_stateid *stp) +test_deny(u32 deny, struct nfs4_ol_stateid *stp) { - return test_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + return (bool)(stp->st_deny_bmap & mask); } static int nfs4_access_to_omode(u32 access) @@ -674,138 +861,283 @@ static int nfs4_access_to_omode(u32 access) return O_RDONLY; } +/* + * A stateid that had a deny mode associated with it is being released + * or downgraded. Recalculate the deny mode on the file. + */ +static void +recalculate_deny_mode(struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *stp; + + spin_lock(&fp->fi_lock); + fp->fi_share_deny = 0; + list_for_each_entry(stp, &fp->fi_stateids, st_perfile) + fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap); + spin_unlock(&fp->fi_lock); +} + +static void +reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + int i; + bool change = false; + + for (i = 1; i < 4; i++) { + if ((i & deny) != i) { + change = true; + clear_deny(i, stp); + } + } + + /* Recalculate per-file deny mode if there was a change */ + if (change) + recalculate_deny_mode(stp->st_stid.sc_file); +} + /* release all access and file references for a given stateid */ static void release_all_access(struct nfs4_ol_stateid *stp) { int i; + struct nfs4_file *fp = stp->st_stid.sc_file; + + if (fp && stp->st_deny_bmap != 0) + recalculate_deny_mode(fp); for (i = 1; i < 4; i++) { if (test_access(i, stp)) - nfs4_file_put_access(stp->st_file, - nfs4_access_to_omode(i)); + nfs4_file_put_access(stp->st_stid.sc_file, i); clear_access(i, stp); } } -static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) +static void nfs4_put_stateowner(struct nfs4_stateowner *sop) { + struct nfs4_client *clp = sop->so_client; + + might_lock(&clp->cl_lock); + + if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock)) + return; + sop->so_ops->so_unhash(sop); + spin_unlock(&clp->cl_lock); + kfree(sop->so_owner.data); + sop->so_ops->so_free(sop); +} + +static void unhash_ol_stateid(struct nfs4_ol_stateid *stp) +{ + struct nfs4_file *fp = stp->st_stid.sc_file; + + lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); + + spin_lock(&fp->fi_lock); list_del(&stp->st_perfile); + spin_unlock(&fp->fi_lock); list_del(&stp->st_perstateowner); } -static void close_generic_stateid(struct nfs4_ol_stateid *stp) +static void nfs4_free_ol_stateid(struct nfs4_stid *stid) { + struct nfs4_ol_stateid *stp = openlockstateid(stid); + release_all_access(stp); - put_nfs4_file(stp->st_file); - stp->st_file = NULL; + if (stp->st_stateowner) + nfs4_put_stateowner(stp->st_stateowner); + kmem_cache_free(stateid_slab, stid); } -static void free_generic_stateid(struct nfs4_ol_stateid *stp) +static void nfs4_free_lock_stateid(struct nfs4_stid *stid) { - remove_stid(&stp->st_stid); - nfs4_free_stid(stateid_slab, &stp->st_stid); + struct nfs4_ol_stateid *stp = openlockstateid(stid); + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + struct file *file; + + file = find_any_file(stp->st_stid.sc_file); + if (file) + filp_close(file, (fl_owner_t)lo); + nfs4_free_ol_stateid(stid); } -static void release_lock_stateid(struct nfs4_ol_stateid *stp) +/* + * Put the persistent reference to an already unhashed generic stateid, while + * holding the cl_lock. If it's the last reference, then put it onto the + * reaplist for later destruction. + */ +static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, + struct list_head *reaplist) { - struct file *file; + struct nfs4_stid *s = &stp->st_stid; + struct nfs4_client *clp = s->sc_client; + + lockdep_assert_held(&clp->cl_lock); - unhash_generic_stateid(stp); + WARN_ON_ONCE(!list_empty(&stp->st_locks)); + + if (!atomic_dec_and_test(&s->sc_count)) { + wake_up_all(&close_wq); + return; + } + + idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + list_add(&stp->st_locks, reaplist); +} + +static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) +{ + struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + + lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + + list_del_init(&stp->st_locks); + unhash_ol_stateid(stp); unhash_stid(&stp->st_stid); - file = find_any_file(stp->st_file); - if (file) - locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner)); - close_generic_stateid(stp); - free_generic_stateid(stp); } -static void unhash_lockowner(struct nfs4_lockowner *lo) +static void release_lock_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_ol_stateid *stp; + struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); - list_del(&lo->lo_owner.so_strhash); - list_del(&lo->lo_perstateid); - list_del(&lo->lo_owner_ino_hash); - while (!list_empty(&lo->lo_owner.so_stateids)) { - stp = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - release_lock_stateid(stp); - } + spin_lock(&oo->oo_owner.so_client->cl_lock); + unhash_lock_stateid(stp); + spin_unlock(&oo->oo_owner.so_client->cl_lock); + nfs4_put_stid(&stp->st_stid); } -static void nfs4_free_lockowner(struct nfs4_lockowner *lo) +static void unhash_lockowner_locked(struct nfs4_lockowner *lo) { - kfree(lo->lo_owner.so_owner.data); - kmem_cache_free(lockowner_slab, lo); + struct nfs4_client *clp = lo->lo_owner.so_client; + + lockdep_assert_held(&clp->cl_lock); + + list_del_init(&lo->lo_owner.so_strhash); +} + +/* + * Free a list of generic stateids that were collected earlier after being + * fully unhashed. + */ +static void +free_ol_stateid_reaplist(struct list_head *reaplist) +{ + struct nfs4_ol_stateid *stp; + struct nfs4_file *fp; + + might_sleep(); + + while (!list_empty(reaplist)) { + stp = list_first_entry(reaplist, struct nfs4_ol_stateid, + st_locks); + list_del(&stp->st_locks); + fp = stp->st_stid.sc_file; + stp->st_stid.sc_free(&stp->st_stid); + if (fp) + put_nfs4_file(fp); + } } static void release_lockowner(struct nfs4_lockowner *lo) { - unhash_lockowner(lo); - nfs4_free_lockowner(lo); + struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfs4_ol_stateid *stp; + struct list_head reaplist; + + INIT_LIST_HEAD(&reaplist); + + spin_lock(&clp->cl_lock); + unhash_lockowner_locked(lo); + while (!list_empty(&lo->lo_owner.so_stateids)) { + stp = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + unhash_lock_stateid(stp); + put_ol_stateid_locked(stp, &reaplist); + } + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); + nfs4_put_stateowner(&lo->lo_owner); } -static void -release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) +static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, + struct list_head *reaplist) { - struct nfs4_lockowner *lo; + struct nfs4_ol_stateid *stp; - while (!list_empty(&open_stp->st_lockowners)) { - lo = list_entry(open_stp->st_lockowners.next, - struct nfs4_lockowner, lo_perstateid); - release_lockowner(lo); + while (!list_empty(&open_stp->st_locks)) { + stp = list_entry(open_stp->st_locks.next, + struct nfs4_ol_stateid, st_locks); + unhash_lock_stateid(stp); + put_ol_stateid_locked(stp, reaplist); } } -static void unhash_open_stateid(struct nfs4_ol_stateid *stp) +static void unhash_open_stateid(struct nfs4_ol_stateid *stp, + struct list_head *reaplist) { - unhash_generic_stateid(stp); - release_stateid_lockowners(stp); - close_generic_stateid(stp); + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); + + unhash_ol_stateid(stp); + release_open_stateid_locks(stp, reaplist); } static void release_open_stateid(struct nfs4_ol_stateid *stp) { - unhash_open_stateid(stp); - free_generic_stateid(stp); + LIST_HEAD(reaplist); + + spin_lock(&stp->st_stid.sc_client->cl_lock); + unhash_open_stateid(stp, &reaplist); + put_ol_stateid_locked(stp, &reaplist); + spin_unlock(&stp->st_stid.sc_client->cl_lock); + free_ol_stateid_reaplist(&reaplist); } -static void unhash_openowner(struct nfs4_openowner *oo) +static void unhash_openowner_locked(struct nfs4_openowner *oo) { - struct nfs4_ol_stateid *stp; + struct nfs4_client *clp = oo->oo_owner.so_client; - list_del(&oo->oo_owner.so_strhash); - list_del(&oo->oo_perclient); - while (!list_empty(&oo->oo_owner.so_stateids)) { - stp = list_first_entry(&oo->oo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - release_open_stateid(stp); - } + lockdep_assert_held(&clp->cl_lock); + + list_del_init(&oo->oo_owner.so_strhash); + list_del_init(&oo->oo_perclient); } static void release_last_closed_stateid(struct nfs4_openowner *oo) { - struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; + struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, + nfsd_net_id); + struct nfs4_ol_stateid *s; + spin_lock(&nn->client_lock); + s = oo->oo_last_closed_stid; if (s) { - free_generic_stateid(s); + list_del_init(&oo->oo_close_lru); oo->oo_last_closed_stid = NULL; } -} - -static void nfs4_free_openowner(struct nfs4_openowner *oo) -{ - kfree(oo->oo_owner.so_owner.data); - kmem_cache_free(openowner_slab, oo); + spin_unlock(&nn->client_lock); + if (s) + nfs4_put_stid(&s->st_stid); } static void release_openowner(struct nfs4_openowner *oo) { - unhash_openowner(oo); - list_del(&oo->oo_close_lru); + struct nfs4_ol_stateid *stp; + struct nfs4_client *clp = oo->oo_owner.so_client; + struct list_head reaplist; + + INIT_LIST_HEAD(&reaplist); + + spin_lock(&clp->cl_lock); + unhash_openowner_locked(oo); + while (!list_empty(&oo->oo_owner.so_stateids)) { + stp = list_first_entry(&oo->oo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + unhash_open_stateid(stp, &reaplist); + put_ol_stateid_locked(stp, &reaplist); + } + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); release_last_closed_stateid(oo); - nfs4_free_openowner(oo); + nfs4_put_stateowner(&oo->oo_owner); } static inline int @@ -842,7 +1174,7 @@ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) return; if (!seqid_mutating_err(ntohl(nfserr))) { - cstate->replay_owner = NULL; + nfsd4_cstate_clear_replay(cstate); return; } if (!so) @@ -1030,10 +1362,8 @@ static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, str if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&conn->cn_xpt_user); - if (conn->cn_flags & NFS4_CDFC4_BACK) { - /* callback channel may be back up */ - nfsd4_probe_callback(ses->se_client); - } + /* We may have gained or lost a callback channel: */ + nfsd4_probe_callback_sync(ses->se_client); } static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) @@ -1073,9 +1403,6 @@ static void __free_session(struct nfsd4_session *ses) static void free_session(struct nfsd4_session *ses) { - struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); - - lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); @@ -1097,12 +1424,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru new->se_cb_sec = cses->cb_sec; atomic_set(&new->se_ref, 0); idx = hash_sessionid(&new->se_sessionid); - spin_lock(&nn->client_lock); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); - spin_unlock(&nn->client_lock); if (cses->flags & SESSION4_BACK_CHAN) { struct sockaddr *sa = svc_addr(rqstp); @@ -1120,12 +1445,14 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru /* caller must hold client_lock */ static struct nfsd4_session * -find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) +__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) { struct nfsd4_session *elem; int idx; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); /* Search in the appropriate list */ @@ -1140,10 +1467,33 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) return NULL; } +static struct nfsd4_session * +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net, + __be32 *ret) +{ + struct nfsd4_session *session; + __be32 status = nfserr_badsession; + + session = __find_in_sessionid_hashtbl(sessionid, net); + if (!session) + goto out; + status = nfsd4_get_session_locked(session); + if (status) + session = NULL; +out: + *ret = status; + return session; +} + /* caller must hold client_lock */ static void unhash_session(struct nfsd4_session *ses) { + struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + list_del(&ses->se_hash); spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); @@ -1169,15 +1519,20 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) static struct nfs4_client *alloc_client(struct xdr_netobj name) { struct nfs4_client *clp; + int i; clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); if (clp == NULL) return NULL; clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); - if (clp->cl_name.data == NULL) { - kfree(clp); - return NULL; - } + if (clp->cl_name.data == NULL) + goto err_no_name; + clp->cl_ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * + OWNER_HASH_SIZE, GFP_KERNEL); + if (!clp->cl_ownerstr_hashtbl) + goto err_no_hashtbl; + for (i = 0; i < OWNER_HASH_SIZE; i++) + INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); clp->cl_name.len = name.len; INIT_LIST_HEAD(&clp->cl_sessions); idr_init(&clp->cl_stateids); @@ -1192,14 +1547,16 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) spin_lock_init(&clp->cl_lock); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; +err_no_hashtbl: + kfree(clp->cl_name.data); +err_no_name: + kfree(clp); + return NULL; } static void free_client(struct nfs4_client *clp) { - struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); - - lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, @@ -1210,18 +1567,32 @@ free_client(struct nfs4_client *clp) } rpc_destroy_wait_queue(&clp->cl_cb_waitq); free_svc_cred(&clp->cl_cred); + kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); idr_destroy(&clp->cl_stateids); kfree(clp); } /* must be called under the client_lock */ -static inline void +static void unhash_client_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_session *ses; - list_del(&clp->cl_lru); + lockdep_assert_held(&nn->client_lock); + + /* Mark the client as expired! */ + clp->cl_time = 0; + /* Make it invisible */ + if (!list_empty(&clp->cl_idhash)) { + list_del_init(&clp->cl_idhash); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + rb_erase(&clp->cl_namenode, &nn->conf_name_tree); + else + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + } + list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) list_del_init(&ses->se_hash); @@ -1229,53 +1600,71 @@ unhash_client_locked(struct nfs4_client *clp) } static void -destroy_client(struct nfs4_client *clp) +unhash_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + unhash_client_locked(clp); + spin_unlock(&nn->client_lock); +} + +static __be32 mark_client_expired_locked(struct nfs4_client *clp) +{ + if (atomic_read(&clp->cl_refcount)) + return nfserr_jukebox; + unhash_client_locked(clp); + return nfs_ok; +} + +static void +__destroy_client(struct nfs4_client *clp) { struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - list_del_init(&dp->dl_perclnt); - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - destroy_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_stid(&dp->dl_stid); } - list_splice_init(&clp->cl_revoked, &reaplist); - while (!list_empty(&reaplist)) { + while (!list_empty(&clp->cl_revoked)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - destroy_revoked_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); + atomic_inc(&oo->oo_owner.so_count); release_openowner(oo); } nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); - list_del(&clp->cl_idhash); - if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) - rb_erase(&clp->cl_namenode, &nn->conf_name_tree); - else - rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); - spin_lock(&nn->client_lock); - unhash_client_locked(clp); - WARN_ON_ONCE(atomic_read(&clp->cl_refcount)); free_client(clp); - spin_unlock(&nn->client_lock); +} + +static void +destroy_client(struct nfs4_client *clp) +{ + unhash_client(clp); + __destroy_client(clp); } static void expire_client(struct nfs4_client *clp) { + unhash_client(clp); nfsd4_client_record_remove(clp); - destroy_client(clp); + __destroy_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) @@ -1408,25 +1797,28 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); } -static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) +static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) { - static u32 current_clientid = 1; + __be32 verf[2]; - clp->cl_clientid.cl_boot = nn->boot_time; - clp->cl_clientid.cl_id = current_clientid++; + /* + * This is opaque to client, so no need to byte-swap. Use + * __force to keep sparse happy + */ + verf[0] = (__force __be32)get_seconds(); + verf[1] = (__force __be32)nn->clientid_counter; + memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } -static void gen_confirm(struct nfs4_client *clp) +static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { - __be32 verf[2]; - static u32 i; - - verf[0] = (__be32)get_seconds(); - verf[1] = (__be32)i++; - memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); + clp->cl_clientid.cl_boot = nn->boot_time; + clp->cl_clientid.cl_id = nn->clientid_counter++; + gen_confirm(clp, nn); } -static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) +static struct nfs4_stid * +find_stateid_locked(struct nfs4_client *cl, stateid_t *t) { struct nfs4_stid *ret; @@ -1436,16 +1828,21 @@ static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) return ret; } -static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) +static struct nfs4_stid * +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) { struct nfs4_stid *s; - s = find_stateid(cl, t); - if (!s) - return NULL; - if (typemask & s->sc_type) - return s; - return NULL; + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, t); + if (s != NULL) { + if (typemask & s->sc_type) + atomic_inc(&s->sc_count); + else + s = NULL; + } + spin_unlock(&cl->cl_lock); + return s; } static struct nfs4_client *create_client(struct xdr_netobj name, @@ -1455,7 +1852,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct sockaddr *sa = svc_addr(rqstp); int ret; struct net *net = SVC_NET(rqstp); - struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) @@ -1463,17 +1859,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name, ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { - spin_lock(&nn->client_lock); free_client(clp); - spin_unlock(&nn->client_lock); return NULL; } - nfsd4_init_callback(&clp->cl_cb_null); + INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); - gen_confirm(clp); clp->cl_cb_session = NULL; clp->net = net; return clp; @@ -1525,11 +1918,13 @@ add_to_unconfirmed(struct nfs4_client *clp) unsigned int idhashval; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); - renew_client(clp); + renew_client_locked(clp); } static void @@ -1538,12 +1933,14 @@ move_to_confirmed(struct nfs4_client *clp) unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); - renew_client(clp); + renew_client_locked(clp); } static struct nfs4_client * @@ -1556,7 +1953,7 @@ find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; - renew_client(clp); + renew_client_locked(clp); return clp; } } @@ -1568,6 +1965,7 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->conf_id_hashtbl; + lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } @@ -1576,6 +1974,7 @@ find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->unconf_id_hashtbl; + lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } @@ -1587,12 +1986,14 @@ static bool clp_used_exchangeid(struct nfs4_client *clp) static struct nfs4_client * find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { + lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->conf_name_tree); } static struct nfs4_client * find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { + lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->unconf_name_tree); } @@ -1642,7 +2043,7 @@ out_err: /* * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. */ -void +static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { struct xdr_buf *buf = resp->xdr.buf; @@ -1758,7 +2159,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_exchange_id *exid) { - struct nfs4_client *unconf, *conf, *new; + struct nfs4_client *conf, *new; + struct nfs4_client *unconf = NULL; __be32 status; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; @@ -1787,8 +2189,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, return nfserr_encr_alg_unsupp; } + new = create_client(exid->clname, rqstp, &verf); + if (new == NULL) + return nfserr_jukebox; + /* Cases below refer to rfc 5661 section 18.35.4: */ - nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); @@ -1813,7 +2219,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, } /* case 6 */ exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; - new = conf; goto out_copy; } if (!creds_match) { /* case 3 */ @@ -1821,15 +2226,14 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfserr_clid_inuse; goto out; } - expire_client(conf); goto out_new; } if (verfs_match) { /* case 2 */ conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; - new = conf; goto out_copy; } /* case 5, client reboot */ + conf = NULL; goto out_new; } @@ -1840,33 +2244,38 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ - expire_client(unconf); + unhash_client_locked(unconf); /* case 1 (normal case) */ out_new: - new = create_client(exid->clname, rqstp, &verf); - if (new == NULL) { - status = nfserr_jukebox; - goto out; + if (conf) { + status = mark_client_expired_locked(conf); + if (status) + goto out; } new->cl_minorversion = cstate->minorversion; new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); + swap(new, conf); out_copy: - exid->clientid.cl_boot = new->cl_clientid.cl_boot; - exid->clientid.cl_id = new->cl_clientid.cl_id; + exid->clientid.cl_boot = conf->cl_clientid.cl_boot; + exid->clientid.cl_id = conf->cl_clientid.cl_id; - exid->seqid = new->cl_cs_slot.sl_seqid + 1; - nfsd4_set_ex_flags(new, exid); + exid->seqid = conf->cl_cs_slot.sl_seqid + 1; + nfsd4_set_ex_flags(conf, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", - new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); + conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; out: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + if (new) + expire_client(new); + if (unconf) + expire_client(unconf); return status; } @@ -2010,6 +2419,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, { struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; + struct nfs4_client *old = NULL; struct nfsd4_session *new; struct nfsd4_conn *conn; struct nfsd4_clid_slot *cs_slot = NULL; @@ -2035,7 +2445,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (!conn) goto out_free_session; - nfs4_lock_state(); + spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); WARN_ON_ONCE(conf && unconf); @@ -2054,7 +2464,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } } else if (unconf) { - struct nfs4_client *old; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { status = nfserr_clid_inuse; @@ -2072,10 +2481,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, } old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { - status = mark_client_expired(old); - if (status) + status = mark_client_expired_locked(old); + if (status) { + old = NULL; goto out_free_conn; - expire_client(old); + } } move_to_confirmed(unconf); conf = unconf; @@ -2091,20 +2501,27 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_RDMA; init_session(rqstp, new, conf, cr_ses); - nfsd4_init_conn(rqstp, conn, new); + nfsd4_get_session_locked(new); memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; - /* cache solo and embedded create sessions under the state lock */ + /* cache solo and embedded create sessions under the client_lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + /* init connection and backchannel */ + nfsd4_init_conn(rqstp, conn, new); + nfsd4_put_session(new); + if (old) + expire_client(old); return status; out_free_conn: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); free_conn(conn); + if (old) + expire_client(old); out_free_session: __free_session(new); out_release_drc_mem: @@ -2152,17 +2569,16 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, __be32 status; struct nfsd4_conn *conn; struct nfsd4_session *session; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; - nfs4_lock_state(); spin_lock(&nn->client_lock); - session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); + session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status); spin_unlock(&nn->client_lock); - status = nfserr_badsession; if (!session) - goto out; + goto out_no_session; status = nfserr_wrong_cred; if (!mach_creds_match(session->se_client, rqstp)) goto out; @@ -2176,7 +2592,8 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, nfsd4_init_conn(rqstp, conn, session); status = nfs_ok; out: - nfs4_unlock_state(); + nfsd4_put_session(session); +out_no_session: return status; } @@ -2195,9 +2612,9 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_session *ses; __be32 status; int ref_held_by_me = 0; - struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); + struct net *net = SVC_NET(r); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfs4_lock_state(); status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) @@ -2206,14 +2623,12 @@ nfsd4_destroy_session(struct svc_rqst *r, } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&nn->client_lock); - ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); - status = nfserr_badsession; + ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status); if (!ses) goto out_client_lock; status = nfserr_wrong_cred; if (!mach_creds_match(ses->se_client, r)) - goto out_client_lock; - nfsd4_get_session_locked(ses); + goto out_put_session; status = mark_session_dead_locked(ses, 1 + ref_held_by_me); if (status) goto out_put_session; @@ -2225,11 +2640,10 @@ nfsd4_destroy_session(struct svc_rqst *r, spin_lock(&nn->client_lock); status = nfs_ok; out_put_session: - nfsd4_put_session(ses); + nfsd4_put_session_locked(ses); out_client_lock: spin_unlock(&nn->client_lock); out: - nfs4_unlock_state(); return status; } @@ -2300,7 +2714,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_conn *conn; __be32 status; int buflen; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (resp->opcnt != 1) return nfserr_sequence_pos; @@ -2314,17 +2729,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, return nfserr_jukebox; spin_lock(&nn->client_lock); - status = nfserr_badsession; - session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); + session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status); if (!session) goto out_no_session; clp = session->se_client; - status = get_client_locked(clp); - if (status) - goto out_no_session; - status = nfsd4_get_session_locked(session); - if (status) - goto out_put_client; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) @@ -2354,6 +2762,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out_put_session; cstate->slot = slot; cstate->session = session; + cstate->clp = clp; /* Return the cached reply status and set cstate->status * for nfsd4_proc_compound processing */ status = nfsd4_replay_cache_entry(resp, seq); @@ -2388,6 +2797,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->slot = slot; cstate->session = session; + cstate->clp = clp; out: switch (clp->cl_cb_state) { @@ -2408,31 +2818,48 @@ out_no_session: spin_unlock(&nn->client_lock); return status; out_put_session: - nfsd4_put_session(session); -out_put_client: - put_client_renew_locked(clp); + nfsd4_put_session_locked(session); goto out_no_session; } +void +nfsd4_sequence_done(struct nfsd4_compoundres *resp) +{ + struct nfsd4_compound_state *cs = &resp->cstate; + + if (nfsd4_has_session(cs)) { + if (cs->status != nfserr_replay_cache) { + nfsd4_store_cache_entry(resp); + cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; + } + /* Drop session reference that was taken in nfsd4_sequence() */ + nfsd4_put_session(cs->session); + } else if (cs->clp) + put_client_renew(cs->clp); +} + __be32 nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) { - struct nfs4_client *conf, *unconf, *clp; + struct nfs4_client *conf, *unconf; + struct nfs4_client *clp = NULL; __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - nfs4_lock_state(); + spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&dc->clientid, true, nn); conf = find_confirmed_client(&dc->clientid, true, nn); WARN_ON_ONCE(conf && unconf); if (conf) { - clp = conf; - if (client_has_state(conf)) { status = nfserr_clientid_busy; goto out; } + status = mark_client_expired_locked(conf); + if (status) + goto out; + clp = conf; } else if (unconf) clp = unconf; else { @@ -2440,12 +2867,15 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta goto out; } if (!mach_creds_match(clp, rqstp)) { + clp = NULL; status = nfserr_wrong_cred; goto out; } - expire_client(clp); + unhash_client_locked(clp); out: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + if (clp) + expire_client(clp); return status; } @@ -2464,7 +2894,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta return nfs_ok; } - nfs4_lock_state(); status = nfserr_complete_already; if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->session->se_client->cl_flags)) @@ -2484,7 +2913,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfs_ok; nfsd4_client_record_create(cstate->session->se_client); out: - nfs4_unlock_state(); return status; } @@ -2494,12 +2922,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; - struct nfs4_client *conf, *unconf, *new; + struct nfs4_client *conf, *new; + struct nfs4_client *unconf = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + new = create_client(clname, rqstp, &clverifier); + if (new == NULL) + return nfserr_jukebox; /* Cases below refer to rfc 3530 section 14.2.33: */ - nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); if (conf) { /* case 0: */ @@ -2517,11 +2949,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) - expire_client(unconf); - status = nfserr_jukebox; - new = create_client(clname, rqstp, &clverifier); - if (new == NULL) - goto out; + unhash_client_locked(unconf); if (conf && same_verf(&conf->cl_verifier, &clverifier)) /* case 1: probable callback update */ copy_clid(new, conf); @@ -2533,9 +2961,14 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); + new = NULL; status = nfs_ok; out: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + if (new) + free_client(new); + if (unconf) + expire_client(unconf); return status; } @@ -2546,6 +2979,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) { struct nfs4_client *conf, *unconf; + struct nfs4_client *old = NULL; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; @@ -2553,8 +2987,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; - nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client(clid, false, nn); unconf = find_unconfirmed_client(clid, false, nn); /* @@ -2578,22 +3012,30 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } status = nfs_ok; if (conf) { /* case 1: callback update */ + old = unconf; + unhash_client_locked(old); nfsd4_change_callback(conf, &unconf->cl_cb_conn); - nfsd4_probe_callback(conf); - expire_client(unconf); } else { /* case 3: normal case; new or rebooted client */ - conf = find_confirmed_client_by_name(&unconf->cl_name, nn); - if (conf) { - status = mark_client_expired(conf); - if (status) + old = find_confirmed_client_by_name(&unconf->cl_name, nn); + if (old) { + status = mark_client_expired_locked(old); + if (status) { + old = NULL; goto out; - expire_client(conf); + } } move_to_confirmed(unconf); - nfsd4_probe_callback(unconf); + conf = unconf; } + get_client_locked(conf); + spin_unlock(&nn->client_lock); + nfsd4_probe_callback(conf); + spin_lock(&nn->client_lock); + put_client_renew_locked(conf); out: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + if (old) + expire_client(old); return status; } @@ -2603,21 +3045,23 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) +static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh) { - unsigned int hashval = file_hashval(ino); + unsigned int hashval = file_hashval(fh); + + lockdep_assert_held(&state_lock); atomic_set(&fp->fi_ref, 1); + spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); - fp->fi_inode = igrab(ino); + fh_copy_shallow(&fp->fi_fhandle, fh); fp->fi_had_conflict = false; fp->fi_lease = NULL; + fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); - spin_lock(&state_lock); hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&state_lock); } void @@ -2673,6 +3117,28 @@ static void init_nfs4_replay(struct nfs4_replay *rp) rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; + mutex_init(&rp->rp_mutex); +} + +static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, + struct nfs4_stateowner *so) +{ + if (!nfsd4_has_session(cstate)) { + mutex_lock(&so->so_replay.rp_mutex); + cstate->replay_owner = so; + atomic_inc(&so->so_count); + } +} + +void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) +{ + struct nfs4_stateowner *so = cstate->replay_owner; + + if (so != NULL) { + cstate->replay_owner = NULL; + mutex_unlock(&so->so_replay.rp_mutex); + nfs4_put_stateowner(so); + } } static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) @@ -2693,111 +3159,172 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj INIT_LIST_HEAD(&sop->so_stateids); sop->so_client = clp; init_nfs4_replay(&sop->so_replay); + atomic_set(&sop->so_count, 1); return sop; } static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&clp->cl_lock); - list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_owner.so_strhash, + &clp->cl_ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } +static void nfs4_unhash_openowner(struct nfs4_stateowner *so) +{ + unhash_openowner_locked(openowner(so)); +} + +static void nfs4_free_openowner(struct nfs4_stateowner *so) +{ + struct nfs4_openowner *oo = openowner(so); + + kmem_cache_free(openowner_slab, oo); +} + +static const struct nfs4_stateowner_operations openowner_ops = { + .so_unhash = nfs4_unhash_openowner, + .so_free = nfs4_free_openowner, +}; + static struct nfs4_openowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { - struct nfs4_openowner *oo; +alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, + struct nfsd4_compound_state *cstate) +{ + struct nfs4_client *clp = cstate->clp; + struct nfs4_openowner *oo, *ret; oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); if (!oo) return NULL; + oo->oo_owner.so_ops = &openowner_ops; oo->oo_owner.so_is_open_owner = 1; oo->oo_owner.so_seqid = open->op_seqid; - oo->oo_flags = NFS4_OO_NEW; + oo->oo_flags = 0; + if (nfsd4_has_session(cstate)) + oo->oo_flags |= NFS4_OO_CONFIRMED; oo->oo_time = 0; oo->oo_last_closed_stid = NULL; INIT_LIST_HEAD(&oo->oo_close_lru); - hash_openowner(oo, clp, strhashval); + spin_lock(&clp->cl_lock); + ret = find_openstateowner_str_locked(strhashval, open, clp); + if (ret == NULL) { + hash_openowner(oo, clp, strhashval); + ret = oo; + } else + nfs4_free_openowner(&oo->oo_owner); + spin_unlock(&clp->cl_lock); return oo; } static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; + atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; - INIT_LIST_HEAD(&stp->st_lockowners); - list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); - list_add(&stp->st_perfile, &fp->fi_stateids); + INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = &oo->oo_owner; + atomic_inc(&stp->st_stateowner->so_count); get_nfs4_file(fp); - stp->st_file = fp; + stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; - set_access(open->op_share_access, stp); - set_deny(open->op_share_deny, stp); stp->st_openstp = NULL; + spin_lock(&oo->oo_owner.so_client->cl_lock); + list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); + spin_lock(&fp->fi_lock); + list_add(&stp->st_perfile, &fp->fi_stateids); + spin_unlock(&fp->fi_lock); + spin_unlock(&oo->oo_owner.so_client->cl_lock); } +/* + * In the 4.0 case we need to keep the owners around a little while to handle + * CLOSE replay. We still do need to release any file access that is held by + * them before returning however. + */ static void -move_to_close_lru(struct nfs4_openowner *oo, struct net *net) +move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) { - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfs4_ol_stateid *last; + struct nfs4_openowner *oo = openowner(s->st_stateowner); + struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net, + nfsd_net_id); dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); + /* + * We know that we hold one reference via nfsd4_close, and another + * "persistent" reference for the client. If the refcount is higher + * than 2, then there are still calls in progress that are using this + * stateid. We can't put the sc_file reference until they are finished. + * Wait for the refcount to drop to 2. Since it has been unhashed, + * there should be no danger of the refcount going back up again at + * this point. + */ + wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2); + + release_all_access(s); + if (s->st_stid.sc_file) { + put_nfs4_file(s->st_stid.sc_file); + s->st_stid.sc_file = NULL; + } + + spin_lock(&nn->client_lock); + last = oo->oo_last_closed_stid; + oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = get_seconds(); + spin_unlock(&nn->client_lock); + if (last) + nfs4_put_stid(&last->st_stid); } -static int -same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, - clientid_t *clid) +/* search file_hashtbl[] for file */ +static struct nfs4_file * +find_file_locked(struct knfsd_fh *fh) { - return (sop->so_owner.len == owner->len) && - 0 == memcmp(sop->so_owner.data, owner->data, owner->len) && - (sop->so_client->cl_clientid.cl_id == clid->cl_id); -} + unsigned int hashval = file_hashval(fh); + struct nfs4_file *fp; -static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, - bool sessions, struct nfsd_net *nn) -{ - struct nfs4_stateowner *so; - struct nfs4_openowner *oo; - struct nfs4_client *clp; + lockdep_assert_held(&state_lock); - list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { - if (!so->so_is_open_owner) - continue; - if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { - oo = openowner(so); - clp = oo->oo_owner.so_client; - if ((bool)clp->cl_minorversion != sessions) - return NULL; - renew_client(oo->oo_owner.so_client); - return oo; + hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { + if (nfsd_fh_match(&fp->fi_fhandle, fh)) { + get_nfs4_file(fp); + return fp; } } return NULL; } -/* search file_hashtbl[] for file */ static struct nfs4_file * -find_file(struct inode *ino) +find_file(struct knfsd_fh *fh) { - unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; spin_lock(&state_lock); - hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { - if (fp->fi_inode == ino) { - get_nfs4_file(fp); - spin_unlock(&state_lock); - return fp; - } + fp = find_file_locked(fh); + spin_unlock(&state_lock); + return fp; +} + +static struct nfs4_file * +find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh) +{ + struct nfs4_file *fp; + + spin_lock(&state_lock); + fp = find_file_locked(fh); + if (fp == NULL) { + nfsd4_init_file(new, fh); + fp = new; } spin_unlock(&state_lock); - return NULL; + + return fp; } /* @@ -2807,47 +3334,53 @@ find_file(struct inode *ino) static __be32 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { - struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_file *fp; - struct nfs4_ol_stateid *stp; - __be32 ret; + __be32 ret = nfs_ok; - fp = find_file(ino); + fp = find_file(¤t_fh->fh_handle); if (!fp) - return nfs_ok; - ret = nfserr_locked; - /* Search for conflicting share reservations */ - list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { - if (test_deny(deny_type, stp) || - test_deny(NFS4_SHARE_DENY_BOTH, stp)) - goto out; - } - ret = nfs_ok; -out: + return ret; + /* Check for conflicting share reservations */ + spin_lock(&fp->fi_lock); + if (fp->fi_share_deny & deny_type) + ret = nfserr_locked; + spin_unlock(&fp->fi_lock); put_nfs4_file(fp); return ret; } -static void nfsd_break_one_deleg(struct nfs4_delegation *dp) +void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) { - struct nfs4_client *clp = dp->dl_stid.sc_client; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, + nfsd_net_id); - lockdep_assert_held(&state_lock); - /* We're assuming the state code never drops its reference + block_delegations(&dp->dl_stid.sc_file->fi_fhandle); + + /* + * We can't do this in nfsd_break_deleg_cb because it is + * already holding inode->i_lock. + * + * If the dl_time != 0, then we know that it has already been + * queued for a lease break. Don't queue it again. + */ + spin_lock(&state_lock); + if (dp->dl_time == 0) { + dp->dl_time = get_seconds(); + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); + } + spin_unlock(&state_lock); +} + +static void nfsd_break_one_deleg(struct nfs4_delegation *dp) +{ + /* + * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel * lock) we know the server hasn't removed the lease yet, we know - * it's safe to take a reference: */ - atomic_inc(&dp->dl_count); - - list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); - - /* Only place dl_time is set; protected by i_lock: */ - dp->dl_time = get_seconds(); - - block_delegations(&dp->dl_fh); - + * it's safe to take a reference. + */ + atomic_inc(&dp->dl_stid.sc_count); nfsd4_cb_recall(dp); } @@ -2872,11 +3405,20 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&state_lock); + spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; - list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) - nfsd_break_one_deleg(dp); - spin_unlock(&state_lock); + /* + * If there are no delegations on the list, then we can't count on this + * lease ever being cleaned up. Set the fl_break_time to jiffies so that + * time_out_leases will do it ASAP. The fact that fi_had_conflict is now + * true should keep any new delegations from being hashed. + */ + if (list_empty(&fp->fi_delegations)) + fl->fl_break_time = jiffies; + else + list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) + nfsd_break_one_deleg(dp); + spin_unlock(&fp->fi_lock); } static @@ -2904,6 +3446,42 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 return nfserr_bad_seqid; } +static __be32 lookup_clientid(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn) +{ + struct nfs4_client *found; + + if (cstate->clp) { + found = cstate->clp; + if (!same_clid(&found->cl_clientid, clid)) + return nfserr_stale_clientid; + return nfs_ok; + } + + if (STALE_CLIENTID(clid, nn)) + return nfserr_stale_clientid; + + /* + * For v4.1+ we get the client in the SEQUENCE op. If we don't have one + * cached already then we know this is for is for v4.0 and "sessions" + * will be false. + */ + WARN_ON_ONCE(cstate->session); + spin_lock(&nn->client_lock); + found = find_confirmed_client(clid, false, nn); + if (!found) { + spin_unlock(&nn->client_lock); + return nfserr_expired; + } + atomic_inc(&found->cl_refcount); + spin_unlock(&nn->client_lock); + + /* Cache the nfs4_client in cstate! */ + cstate->clp = found; + return nfs_ok; +} + __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct nfsd_net *nn) @@ -2924,19 +3502,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); - oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); + status = lookup_clientid(clientid, cstate, nn); + if (status) + return status; + clp = cstate->clp; + + strhashval = ownerstr_hashval(&open->op_owner); + oo = find_openstateowner_str(strhashval, open, clp); open->op_openowner = oo; if (!oo) { - clp = find_confirmed_client(clientid, cstate->minorversion, - nn); - if (clp == NULL) - return nfserr_expired; goto new_owner; } if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { /* Replace unconfirmed owners without checking for replay. */ - clp = oo->oo_owner.so_client; release_openowner(oo); open->op_openowner = NULL; goto new_owner; @@ -2944,15 +3522,14 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; - clp = oo->oo_owner.so_client; goto alloc_stateid; new_owner: - oo = alloc_init_open_stateowner(strhashval, clp, open); + oo = alloc_init_open_stateowner(strhashval, open, cstate); if (oo == NULL) return nfserr_jukebox; open->op_openowner = oo; alloc_stateid: - open->op_stp = nfs4_alloc_stateid(clp); + open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; return nfs_ok; @@ -2994,14 +3571,18 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, { int flags; __be32 status = nfserr_bad_stateid; + struct nfs4_delegation *deleg; - *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); - if (*dp == NULL) + deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); + if (deleg == NULL) goto out; flags = share_access_to_flags(open->op_share_access); - status = nfs4_check_delegmode(*dp, flags); - if (status) - *dp = NULL; + status = nfs4_check_delegmode(deleg, flags); + if (status) { + nfs4_put_stid(&deleg->dl_stid); + goto out; + } + *dp = deleg; out: if (!nfsd4_is_deleg_cur(open)) return nfs_ok; @@ -3011,24 +3592,25 @@ out: return nfs_ok; } -static __be32 -nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) +static struct nfs4_ol_stateid * +nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) { - struct nfs4_ol_stateid *local; + struct nfs4_ol_stateid *local, *ret = NULL; struct nfs4_openowner *oo = open->op_openowner; + spin_lock(&fp->fi_lock); list_for_each_entry(local, &fp->fi_stateids, st_perfile) { /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; - /* remember if we have seen this open owner */ - if (local->st_stateowner == &oo->oo_owner) - *stpp = local; - /* check for conflicting share reservations */ - if (!test_share(local, open)) - return nfserr_share_denied; + if (local->st_stateowner == &oo->oo_owner) { + ret = local; + atomic_inc(&ret->st_stid.sc_count); + break; + } } - return nfs_ok; + spin_unlock(&fp->fi_lock); + return ret; } static inline int nfs4_access_to_access(u32 nfs4_access) @@ -3042,24 +3624,6 @@ static inline int nfs4_access_to_access(u32 nfs4_access) return flags; } -static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, - struct svc_fh *cur_fh, struct nfsd4_open *open) -{ - __be32 status; - int oflag = nfs4_access_to_omode(open->op_share_access); - int access = nfs4_access_to_access(open->op_share_access); - - if (!fp->fi_fds[oflag]) { - status = nfsd_open(rqstp, cur_fh, S_IFREG, access, - &fp->fi_fds[oflag]); - if (status) - return status; - } - nfs4_file_get_access(fp, oflag); - - return nfs_ok; -} - static inline __be32 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, struct nfsd4_open *open) @@ -3075,34 +3639,99 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); } -static __be32 -nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) +static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, + struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, + struct nfsd4_open *open) { - u32 op_share_access = open->op_share_access; - bool new_access; + struct file *filp = NULL; __be32 status; + int oflag = nfs4_access_to_omode(open->op_share_access); + int access = nfs4_access_to_access(open->op_share_access); + unsigned char old_access_bmap, old_deny_bmap; - new_access = !test_access(op_share_access, stp); - if (new_access) { - status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); - if (status) - return status; + spin_lock(&fp->fi_lock); + + /* + * Are we trying to set a deny mode that would conflict with + * current access? + */ + status = nfs4_file_check_deny(fp, open->op_share_deny); + if (status != nfs_ok) { + spin_unlock(&fp->fi_lock); + goto out; } - status = nfsd4_truncate(rqstp, cur_fh, open); - if (status) { - if (new_access) { - int oflag = nfs4_access_to_omode(op_share_access); - nfs4_file_put_access(fp, oflag); - } - return status; + + /* set access to the file */ + status = nfs4_file_get_access(fp, open->op_share_access); + if (status != nfs_ok) { + spin_unlock(&fp->fi_lock); + goto out; } - /* remember the open */ - set_access(op_share_access, stp); + + /* Set access bits in stateid */ + old_access_bmap = stp->st_access_bmap; + set_access(open->op_share_access, stp); + + /* Set new deny mask */ + old_deny_bmap = stp->st_deny_bmap; set_deny(open->op_share_deny, stp); + fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); - return nfs_ok; + if (!fp->fi_fds[oflag]) { + spin_unlock(&fp->fi_lock); + status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp); + if (status) + goto out_put_access; + spin_lock(&fp->fi_lock); + if (!fp->fi_fds[oflag]) { + fp->fi_fds[oflag] = filp; + filp = NULL; + } + } + spin_unlock(&fp->fi_lock); + if (filp) + fput(filp); + + status = nfsd4_truncate(rqstp, cur_fh, open); + if (status) + goto out_put_access; +out: + return status; +out_put_access: + stp->st_access_bmap = old_access_bmap; + nfs4_file_put_access(fp, open->op_share_access); + reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp); + goto out; } +static __be32 +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) +{ + __be32 status; + unsigned char old_deny_bmap; + + if (!test_access(open->op_share_access, stp)) + return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); + + /* test and set deny mode */ + spin_lock(&fp->fi_lock); + status = nfs4_file_check_deny(fp, open->op_share_deny); + if (status == nfs_ok) { + old_deny_bmap = stp->st_deny_bmap; + set_deny(open->op_share_deny, stp); + fp->fi_share_deny |= + (open->op_share_deny & NFS4_SHARE_DENY_BOTH); + } + spin_unlock(&fp->fi_lock); + + if (status != nfs_ok) + return status; + + status = nfsd4_truncate(rqstp, cur_fh, open); + if (status != nfs_ok) + reset_union_bmap_deny(old_deny_bmap, stp); + return status; +} static void nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) @@ -3123,7 +3752,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) { struct file_lock *fl; @@ -3135,53 +3764,101 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)(dp->dl_file); + fl->fl_owner = (fl_owner_t)fp; fl->fl_pid = current->tgid; return fl; } static int nfs4_setlease(struct nfs4_delegation *dp) { - struct nfs4_file *fp = dp->dl_file; + struct nfs4_file *fp = dp->dl_stid.sc_file; struct file_lock *fl; - int status; + struct file *filp; + int status = 0; - fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); + fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; - fl->fl_file = find_readable_file(fp); - status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); - if (status) - goto out_free; + filp = find_readable_file(fp); + if (!filp) { + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return -EBADF; + } + fl->fl_file = filp; + status = vfs_setlease(filp, fl->fl_type, &fl); + if (status) { + locks_free_lock(fl); + goto out_fput; + } + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + /* Did the lease get broken before we took the lock? */ + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + /* Race breaker */ + if (fp->fi_lease) { + status = 0; + atomic_inc(&fp->fi_delegees); + hash_delegation_locked(dp, fp); + goto out_unlock; + } fp->fi_lease = fl; - fp->fi_deleg_file = get_file(fl->fl_file); + fp->fi_deleg_file = filp; atomic_set(&fp->fi_delegees, 1); - spin_lock(&state_lock); hash_delegation_locked(dp, fp); + spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); return 0; -out_free: - locks_free_lock(fl); +out_unlock: + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); +out_fput: + fput(filp); return status; } -static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) +static struct nfs4_delegation * +nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, + struct nfs4_file *fp) { + int status; + struct nfs4_delegation *dp; + if (fp->fi_had_conflict) - return -EAGAIN; + return ERR_PTR(-EAGAIN); + + dp = alloc_init_deleg(clp, fh); + if (!dp) + return ERR_PTR(-ENOMEM); + get_nfs4_file(fp); - dp->dl_file = fp; - if (!fp->fi_lease) - return nfs4_setlease(dp); spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + dp->dl_stid.sc_file = fp; + if (!fp->fi_lease) { + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + status = nfs4_setlease(dp); + goto out; + } atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { - spin_unlock(&state_lock); - return -EAGAIN; + status = -EAGAIN; + goto out_unlock; } hash_delegation_locked(dp, fp); + status = 0; +out_unlock: + spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); - return 0; +out: + if (status) { + nfs4_put_stid(&dp->dl_stid); + return ERR_PTR(status); + } + return dp; } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -3212,11 +3889,12 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct net *net, struct svc_fh *fh, - struct nfsd4_open *open, struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, + struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; - struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); + struct nfs4_openowner *oo = openowner(stp->st_stateowner); + struct nfs4_client *clp = stp->st_stid.sc_client; int cb_up; int status = 0; @@ -3235,7 +3913,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, * Let's not give out any delegations till everyone's * had the chance to reclaim theirs.... */ - if (locks_in_grace(net)) + if (locks_in_grace(clp->net)) goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; @@ -3254,21 +3932,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, default: goto out_no_deleg; } - dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); - if (dp == NULL) + dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file); + if (IS_ERR(dp)) goto out_no_deleg; - status = nfs4_set_delegation(dp, stp->st_file); - if (status) - goto out_free; memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; + nfs4_put_stid(&dp->dl_stid); return; -out_free: - destroy_delegation(dp); out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && @@ -3301,16 +3975,12 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, */ } -/* - * called with nfs4_lock_state() held. - */ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; - struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -3320,21 +3990,18 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_file(ino); - if (fp) { - if ((status = nfs4_check_open(fp, open, &stp))) - goto out; + fp = find_or_add_file(open->op_file, ¤t_fh->fh_handle); + if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + stp = nfsd4_find_existing_open(fp, open); } else { + open->op_file = NULL; status = nfserr_bad_stateid; if (nfsd4_is_deleg_cur(open)) goto out; status = nfserr_jukebox; - fp = open->op_file; - open->op_file = NULL; - nfsd4_init_file(fp, ino); } /* @@ -3347,22 +4014,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (status) goto out; } else { - status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); - if (status) - goto out; - status = nfsd4_truncate(rqstp, current_fh, open); - if (status) - goto out; stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); + if (status) { + release_open_stateid(stp); + goto out; + } } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); if (nfsd4_has_session(&resp->cstate)) { - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_WANTED; @@ -3374,7 +4038,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp); + nfs4_open_delegation(current_fh, open, stp); nodeleg: status = nfs_ok; @@ -3397,41 +4061,27 @@ out: if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && !nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; + if (dp) + nfs4_put_stid(&dp->dl_stid); + if (stp) + nfs4_put_stid(&stp->st_stid); return status; } -void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) +void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open, __be32 status) { if (open->op_openowner) { - struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_stateowner *so = &open->op_openowner->oo_owner; - if (!list_empty(&oo->oo_owner.so_stateids)) - list_del_init(&oo->oo_close_lru); - if (oo->oo_flags & NFS4_OO_NEW) { - if (status) { - release_openowner(oo); - open->op_openowner = NULL; - } else - oo->oo_flags &= ~NFS4_OO_NEW; - } + nfsd4_cstate_assign_replay(cstate, so); + nfs4_put_stateowner(so); } if (open->op_file) nfsd4_free_file(open->op_file); if (open->op_stp) - free_generic_stateid(open->op_stp); -} - -static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp) -{ - struct nfs4_client *found; - - if (STALE_CLIENTID(clid, nn)) - return nfserr_stale_clientid; - found = find_confirmed_client(clid, session, nn); - if (clp) - *clp = found; - return found ? nfs_ok : nfserr_expired; + nfs4_put_stid(&open->op_stp->st_stid); } __be32 @@ -3442,19 +4092,18 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate->minorversion, nn, &clp); + status = lookup_clientid(clid, cstate, nn); if (status) goto out; + clp = cstate->clp; status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) goto out; status = nfs_ok; out: - nfs4_unlock_state(); return status; } @@ -3483,12 +4132,11 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; + struct nfs4_ol_stateid *stp; struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; time_t t, new_timeo = nn->nfsd4_lease; - nfs4_lock_state(); - dprintk("NFSD: laundromat service - starting\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); @@ -3505,13 +4153,14 @@ nfs4_laundromat(struct nfsd_net *nn) clp->cl_clientid.cl_id); continue; } - list_move(&clp->cl_lru, &reaplist); + list_add(&clp->cl_lru, &reaplist); } spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); + list_del_init(&clp->cl_lru); expire_client(clp); } spin_lock(&state_lock); @@ -3524,24 +4173,37 @@ nfs4_laundromat(struct nfsd_net *nn) new_timeo = min(new_timeo, t); break; } - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); - list_for_each_safe(pos, next, &reaplist) { - dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + while (!list_empty(&reaplist)) { + dp = list_first_entry(&reaplist, struct nfs4_delegation, + dl_recall_lru); + list_del_init(&dp->dl_recall_lru); revoke_delegation(dp); } - list_for_each_safe(pos, next, &nn->close_lru) { - oo = container_of(pos, struct nfs4_openowner, oo_close_lru); - if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { + + spin_lock(&nn->client_lock); + while (!list_empty(&nn->close_lru)) { + oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, + oo_close_lru); + if (time_after((unsigned long)oo->oo_time, + (unsigned long)cutoff)) { t = oo->oo_time - cutoff; new_timeo = min(new_timeo, t); break; } - release_openowner(oo); + list_del_init(&oo->oo_close_lru); + stp = oo->oo_last_closed_stid; + oo->oo_last_closed_stid = NULL; + spin_unlock(&nn->client_lock); + nfs4_put_stid(&stp->st_stid); + spin_lock(&nn->client_lock); } + spin_unlock(&nn->client_lock); + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); - nfs4_unlock_state(); return new_timeo; } @@ -3564,7 +4226,7 @@ laundromat_main(struct work_struct *laundry) static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) { - if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) + if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } @@ -3666,10 +4328,10 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; struct nfs4_ol_stateid *ols; - __be32 status; + __be32 status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return nfserr_bad_stateid; + return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { char addr_str[INET6_ADDRSTRLEN]; @@ -3677,53 +4339,62 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) sizeof(addr_str)); pr_warn_ratelimited("NFSD: client %s testing state ID " "with incorrect client ID\n", addr_str); - return nfserr_bad_stateid; + return status; } - s = find_stateid(cl, stateid); + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, stateid); if (!s) - return nfserr_bad_stateid; + goto out_unlock; status = check_stateid_generation(stateid, &s->sc_stateid, 1); if (status) - return status; + goto out_unlock; switch (s->sc_type) { case NFS4_DELEG_STID: - return nfs_ok; + status = nfs_ok; + break; case NFS4_REVOKED_DELEG_STID: - return nfserr_deleg_revoked; + status = nfserr_deleg_revoked; + break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: ols = openlockstateid(s); if (ols->st_stateowner->so_is_open_owner && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) - return nfserr_bad_stateid; - return nfs_ok; + status = nfserr_bad_stateid; + else + status = nfs_ok; + break; default: printk("unknown stateid type %x\n", s->sc_type); + /* Fallthrough */ case NFS4_CLOSED_STID: - return nfserr_bad_stateid; + case NFS4_CLOSED_DELEG_STID: + status = nfserr_bad_stateid; } +out_unlock: + spin_unlock(&cl->cl_lock); + return status; } -static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, - struct nfs4_stid **s, bool sessions, - struct nfsd_net *nn) +static __be32 +nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, + stateid_t *stateid, unsigned char typemask, + struct nfs4_stid **s, struct nfsd_net *nn) { - struct nfs4_client *cl; __be32 status; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; - status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, - nn, &cl); + status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { - if (sessions) + if (cstate->session) return nfserr_bad_stateid; return nfserr_stale_stateid; } if (status) return status; - *s = find_stateid_by_type(cl, stateid, typemask); + *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; return nfs_ok; @@ -3754,12 +4425,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(net, current_fh, stateid, flags); - nfs4_lock_state(); - - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, - &s, cstate->minorversion, nn); + status = nfsd4_lookup_stateid(cstate, stateid, + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + &s, nn); if (status) - goto out; + return status; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; @@ -3770,12 +4440,13 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { - file = dp->dl_file->fi_deleg_file; + file = dp->dl_stid.sc_file->fi_deleg_file; if (!file) { WARN_ON_ONCE(1); status = nfserr_serverfault; goto out; } + get_file(file); } break; case NFS4_OPEN_STID: @@ -3791,10 +4462,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { + struct nfs4_file *fp = stp->st_stid.sc_file; + if (flags & RD_STATE) - file = find_readable_file(stp->st_file); + file = find_readable_file(fp); else - file = find_writeable_file(stp->st_file); + file = find_writeable_file(fp); } break; default: @@ -3803,28 +4476,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, } status = nfs_ok; if (file) - *filpp = get_file(file); + *filpp = file; out: - nfs4_unlock_state(); + nfs4_put_stid(s); return status; } -static __be32 -nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) -{ - struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); - - if (check_for_locks(stp->st_file, lo)) - return nfserr_locks_held; - /* - * Currently there's a 1-1 lock stateid<->lockowner - * correspondance, and we have to delete the lockowner when we - * delete the lock stateid: - */ - release_lockowner(lo); - return nfs_ok; -} - /* * Test if the stateid is valid */ @@ -3835,11 +4492,9 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_test_stateid_id *stateid; struct nfs4_client *cl = cstate->session->se_client; - nfs4_lock_state(); list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) stateid->ts_id_status = nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); - nfs4_unlock_state(); return nfs_ok; } @@ -3851,37 +4506,50 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; struct nfs4_delegation *dp; + struct nfs4_ol_stateid *stp; struct nfs4_client *cl = cstate->session->se_client; __be32 ret = nfserr_bad_stateid; - nfs4_lock_state(); - s = find_stateid(cl, stateid); + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, stateid); if (!s) - goto out; + goto out_unlock; switch (s->sc_type) { case NFS4_DELEG_STID: ret = nfserr_locks_held; - goto out; + break; case NFS4_OPEN_STID: - case NFS4_LOCK_STID: ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) - goto out; - if (s->sc_type == NFS4_LOCK_STID) - ret = nfsd4_free_lock_stateid(openlockstateid(s)); - else - ret = nfserr_locks_held; + break; + ret = nfserr_locks_held; break; + case NFS4_LOCK_STID: + ret = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (ret) + break; + stp = openlockstateid(s); + ret = nfserr_locks_held; + if (check_for_locks(stp->st_stid.sc_file, + lockowner(stp->st_stateowner))) + break; + unhash_lock_stateid(stp); + spin_unlock(&cl->cl_lock); + nfs4_put_stid(s); + ret = nfs_ok; + goto out; case NFS4_REVOKED_DELEG_STID: dp = delegstateid(s); - destroy_revoked_delegation(dp); + list_del_init(&dp->dl_recall_lru); + spin_unlock(&cl->cl_lock); + nfs4_put_stid(s); ret = nfs_ok; - break; - default: - ret = nfserr_bad_stateid; + goto out; + /* Default falls through and returns nfserr_bad_stateid */ } +out_unlock: + spin_unlock(&cl->cl_lock); out: - nfs4_unlock_state(); return ret; } @@ -3926,20 +4594,24 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, { __be32 status; struct nfs4_stid *s; + struct nfs4_ol_stateid *stp = NULL; dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, seqid, STATEID_VAL(stateid)); *stpp = NULL; - status = nfsd4_lookup_stateid(stateid, typemask, &s, - cstate->minorversion, nn); + status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn); if (status) return status; - *stpp = openlockstateid(s); - if (!nfsd4_has_session(cstate)) - cstate->replay_owner = (*stpp)->st_stateowner; + stp = openlockstateid(s); + nfsd4_cstate_assign_replay(cstate, stp->st_stateowner); - return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); + status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); + if (!status) + *stpp = stp; + else + nfs4_put_stid(&stp->st_stid); + return status; } static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, @@ -3947,14 +4619,18 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs { __be32 status; struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, - NFS4_OPEN_STID, stpp, nn); + NFS4_OPEN_STID, &stp, nn); if (status) return status; - oo = openowner((*stpp)->st_stateowner); - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) + oo = openowner(stp->st_stateowner); + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; + } + *stpp = stp; return nfs_ok; } @@ -3974,8 +4650,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; - nfs4_lock_state(); - status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, NFS4_OPEN_STID, &stp, nn); @@ -3984,7 +4658,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) - goto out; + goto put_stateid; oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(&stp->st_stid.sc_stateid); memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); @@ -3993,10 +4667,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } @@ -4004,7 +4678,7 @@ static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 a { if (!test_access(access, stp)) return; - nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); + nfs4_file_put_access(stp->st_stid.sc_file, access); clear_access(access, stp); } @@ -4026,16 +4700,6 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac } } -static void -reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp) -{ - int i; - for (i = 0; i < 4; i++) { - if ((i & deny) != i) - clear_deny(i, stp); - } -} - __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -4053,21 +4717,20 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, od->od_deleg_want); - nfs4_lock_state(); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, &od->od_stateid, &stp, nn); if (status) goto out; status = nfserr_inval; if (!test_access(od->od_share_access, stp)) { - dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", + dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); - goto out; + goto put_stateid; } if (!test_deny(od->od_share_deny, stp)) { - dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", + dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); - goto out; + goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); @@ -4076,17 +4739,31 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, update_stateid(&stp->st_stid.sc_stateid); memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { - unhash_open_stateid(s); + struct nfs4_client *clp = s->st_stid.sc_client; + LIST_HEAD(reaplist); + s->st_stid.sc_type = NFS4_CLOSED_STID; + spin_lock(&clp->cl_lock); + unhash_open_stateid(s, &reaplist); + + if (clp->cl_minorversion) { + put_ol_stateid_locked(s, &reaplist); + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); + } else { + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); + move_to_close_lru(s, clp->net); + } } /* @@ -4097,7 +4774,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_close *close) { __be32 status; - struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -4105,7 +4781,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_close on file %pd\n", cstate->current_fh.fh_dentry); - nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, NFS4_OPEN_STID|NFS4_CLOSED_STID, @@ -4113,31 +4788,14 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; - oo = openowner(stp->st_stateowner); update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); nfsd4_close_open_stateid(stp); - if (cstate->minorversion) - free_generic_stateid(stp); - else - oo->oo_last_closed_stid = stp; - - if (list_empty(&oo->oo_owner.so_stateids)) { - if (cstate->minorversion) - release_openowner(oo); - else { - /* - * In the 4.0 case we need to keep the owners around a - * little while to handle CLOSE replay. - */ - move_to_close_lru(oo, SVC_NET(rqstp)); - } - } + /* put reference from nfs4_preprocess_seqid_op */ + nfs4_put_stid(&stp->st_stid); out: - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } @@ -4154,28 +4812,24 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, - cstate->minorversion, nn); + status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn); if (status) goto out; dp = delegstateid(s); status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); if (status) - goto out; + goto put_stateid; destroy_delegation(dp); +put_stateid: + nfs4_put_stid(&dp->dl_stid); out: - nfs4_unlock_state(); - return status; } #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) - static inline u64 end_offset(u64 start, u64 len) { @@ -4196,13 +4850,6 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) -{ - return (file_hashval(inode) + cl_id - + opaque_hashval(ownername->data, ownername->len)) - & LOCKOWNER_INO_HASH_MASK; -} - /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -4255,47 +4902,56 @@ nevermind: deny->ld_type = NFS4_WRITE_LT; } -static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +static struct nfs4_lockowner * +find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, + struct nfs4_client *clp) { - struct nfs4_ol_stateid *lst; + unsigned int strhashval = ownerstr_hashval(owner); + struct nfs4_stateowner *so; - if (!same_owner_str(&lo->lo_owner, owner, clid)) - return false; - if (list_empty(&lo->lo_owner.so_stateids)) { - WARN_ON_ONCE(1); - return false; + lockdep_assert_held(&clp->cl_lock); + + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval], + so_strhash) { + if (so->so_is_open_owner) + continue; + if (!same_owner_str(so, owner)) + continue; + atomic_inc(&so->so_count); + return lockowner(so); } - lst = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - return lst->st_file->fi_inode == inode; + return NULL; } static struct nfs4_lockowner * -find_lockowner_str(struct inode *inode, clientid_t *clid, - struct xdr_netobj *owner, struct nfsd_net *nn) +find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, + struct nfs4_client *clp) { - unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); struct nfs4_lockowner *lo; - list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { - if (same_lockowner_ino(lo, inode, clid, owner)) - return lo; - } - return NULL; + spin_lock(&clp->cl_lock); + lo = find_lockowner_str_locked(clid, owner, clp); + spin_unlock(&clp->cl_lock); + return lo; } -static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) +static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop) { - struct inode *inode = open_stp->st_file->fi_inode; - unsigned int inohash = lockowner_ino_hashval(inode, - clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + unhash_lockowner_locked(lockowner(sop)); +} + +static void nfs4_free_lockowner(struct nfs4_stateowner *sop) +{ + struct nfs4_lockowner *lo = lockowner(sop); - list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); - list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]); - list_add(&lo->lo_perstateid, &open_stp->st_lockowners); + kmem_cache_free(lockowner_slab, lo); } +static const struct nfs4_stateowner_operations lockowner_ops = { + .so_unhash = nfs4_unhash_lockowner, + .so_free = nfs4_free_lockowner, +}; + /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has @@ -4303,42 +4959,107 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s * * strhashval = ownerstr_hashval */ - static struct nfs4_lockowner * -alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { - struct nfs4_lockowner *lo; +alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, + struct nfs4_ol_stateid *open_stp, + struct nfsd4_lock *lock) +{ + struct nfs4_lockowner *lo, *ret; lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); if (!lo) return NULL; INIT_LIST_HEAD(&lo->lo_owner.so_stateids); lo->lo_owner.so_is_open_owner = 0; - /* It is the openowner seqid that will be incremented in encode in the - * case of new lockowners; so increment the lock seqid manually: */ - lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; - hash_lockowner(lo, strhashval, clp, open_stp); + lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; + lo->lo_owner.so_ops = &lockowner_ops; + spin_lock(&clp->cl_lock); + ret = find_lockowner_str_locked(&clp->cl_clientid, + &lock->lk_new_owner, clp); + if (ret == NULL) { + list_add(&lo->lo_owner.so_strhash, + &clp->cl_ownerstr_hashtbl[strhashval]); + ret = lo; + } else + nfs4_free_lockowner(&lo->lo_owner); + spin_unlock(&clp->cl_lock); return lo; } -static struct nfs4_ol_stateid * -alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) +static void +init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, + struct nfs4_file *fp, struct inode *inode, + struct nfs4_ol_stateid *open_stp) { - struct nfs4_ol_stateid *stp; struct nfs4_client *clp = lo->lo_owner.so_client; - stp = nfs4_alloc_stateid(clp); - if (stp == NULL) - return NULL; + lockdep_assert_held(&clp->cl_lock); + + atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; - list_add(&stp->st_perfile, &fp->fi_stateids); - list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; + atomic_inc(&lo->lo_owner.so_count); get_nfs4_file(fp); - stp->st_file = fp; + stp->st_stid.sc_file = fp; + stp->st_stid.sc_free = nfs4_free_lock_stateid; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - return stp; + list_add(&stp->st_locks, &open_stp->st_locks); + list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); + spin_lock(&fp->fi_lock); + list_add(&stp->st_perfile, &fp->fi_stateids); + spin_unlock(&fp->fi_lock); +} + +static struct nfs4_ol_stateid * +find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *lst; + struct nfs4_client *clp = lo->lo_owner.so_client; + + lockdep_assert_held(&clp->cl_lock); + + list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { + if (lst->st_stid.sc_file == fp) { + atomic_inc(&lst->st_stid.sc_count); + return lst; + } + } + return NULL; +} + +static struct nfs4_ol_stateid * +find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, + struct inode *inode, struct nfs4_ol_stateid *ost, + bool *new) +{ + struct nfs4_stid *ns = NULL; + struct nfs4_ol_stateid *lst; + struct nfs4_openowner *oo = openowner(ost->st_stateowner); + struct nfs4_client *clp = oo->oo_owner.so_client; + + spin_lock(&clp->cl_lock); + lst = find_lock_stateid(lo, fi); + if (lst == NULL) { + spin_unlock(&clp->cl_lock); + ns = nfs4_alloc_stid(clp, stateid_slab); + if (ns == NULL) + return NULL; + + spin_lock(&clp->cl_lock); + lst = find_lock_stateid(lo, fi); + if (likely(!lst)) { + lst = openlockstateid(ns); + init_lock_stateid(lst, lo, fi, inode, ost); + ns = NULL; + *new = true; + } + } + spin_unlock(&clp->cl_lock); + if (ns) + nfs4_put_stid(ns); + return lst; } static int @@ -4350,46 +5071,53 @@ check_lock_length(u64 offset, u64 length) static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) { - struct nfs4_file *fp = lock_stp->st_file; - int oflag = nfs4_access_to_omode(access); + struct nfs4_file *fp = lock_stp->st_stid.sc_file; + + lockdep_assert_held(&fp->fi_lock); if (test_access(access, lock_stp)) return; - nfs4_file_get_access(fp, oflag); + __nfs4_file_get_access(fp, access); set_access(access, lock_stp); } -static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +static __be32 +lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, + struct nfs4_ol_stateid *ost, + struct nfsd4_lock *lock, + struct nfs4_ol_stateid **lst, bool *new) { - struct nfs4_file *fi = ost->st_file; + __be32 status; + struct nfs4_file *fi = ost->st_stid.sc_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *cl = oo->oo_owner.so_client; + struct inode *inode = cstate->current_fh.fh_dentry->d_inode; struct nfs4_lockowner *lo; unsigned int strhashval; - struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); - - lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, - &lock->v.new.owner, nn); - if (lo) { - if (!cstate->minorversion) - return nfserr_bad_seqid; - /* XXX: a lockowner always has exactly one stateid: */ - *lst = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - return nfs_ok; + + lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl); + if (!lo) { + strhashval = ownerstr_hashval(&lock->v.new.owner); + lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); + if (lo == NULL) + return nfserr_jukebox; + } else { + /* with an existing lockowner, seqids must be the same */ + status = nfserr_bad_seqid; + if (!cstate->minorversion && + lock->lk_new_lock_seqid != lo->lo_owner.so_seqid) + goto out; } - strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, - &lock->v.new.owner); - lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); - if (lo == NULL) - return nfserr_jukebox; - *lst = alloc_init_lock_stateid(lo, fi, ost); + + *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (*lst == NULL) { - release_lockowner(lo); - return nfserr_jukebox; + status = nfserr_jukebox; + goto out; } - *new = true; - return nfs_ok; + status = nfs_ok; +out: + nfs4_put_stateowner(&lo->lo_owner); + return status; } /* @@ -4401,14 +5129,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfs4_openowner *open_sop = NULL; struct nfs4_lockowner *lock_sop = NULL; - struct nfs4_ol_stateid *lock_stp; + struct nfs4_ol_stateid *lock_stp = NULL; + struct nfs4_ol_stateid *open_stp = NULL; + struct nfs4_file *fp; struct file *filp = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; __be32 status = 0; - bool new_state = false; int lkflg; int err; + bool new = false; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -4425,11 +5155,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } - nfs4_lock_state(); - if (lock->lk_is_new) { - struct nfs4_ol_stateid *open_stp = NULL; - if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ memcpy(&lock->v.new.clientid, @@ -4453,12 +5179,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &lock->v.new.clientid)) goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, - &lock_stp, &new_state); - } else + &lock_stp, &new); + } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, NFS4_LOCK_STID, &lock_stp, nn); + } if (status) goto out; lock_sop = lockowner(lock_stp->st_stateowner); @@ -4482,20 +5209,25 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } + fp = lock_stp->st_stid.sc_file; locks_init_lock(file_lock); switch (lock->lk_type) { case NFS4_READ_LT: case NFS4_READW_LT: - filp = find_readable_file(lock_stp->st_file); + spin_lock(&fp->fi_lock); + filp = find_readable_file_locked(fp); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); + spin_unlock(&fp->fi_lock); file_lock->fl_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: - filp = find_writeable_file(lock_stp->st_file); + spin_lock(&fp->fi_lock); + filp = find_writeable_file_locked(fp); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); + spin_unlock(&fp->fi_lock); file_lock->fl_type = F_WRLCK; break; default: @@ -4544,11 +5276,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; } out: - if (status && new_state) - release_lockowner(lock_sop); + if (filp) + fput(filp); + if (lock_stp) { + /* Bump seqid manually if the 4.0 replay owner is openowner */ + if (cstate->replay_owner && + cstate->replay_owner != &lock_sop->lo_owner && + seqid_mutating_err(ntohl(status))) + lock_sop->lo_owner.so_seqid++; + + /* + * If this is a new, never-before-used stateid, and we are + * returning an error, then just go ahead and release it. + */ + if (status && new) + release_lock_stateid(lock_stp); + + nfs4_put_stid(&lock_stp->st_stid); + } + if (open_stp) + nfs4_put_stid(&open_stp->st_stid); nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); if (conflock) @@ -4580,9 +5328,8 @@ __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_lockt *lockt) { - struct inode *inode; struct file_lock *file_lock = NULL; - struct nfs4_lockowner *lo; + struct nfs4_lockowner *lo = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -4592,10 +5339,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(lockt->lt_offset, lockt->lt_length)) return nfserr_inval; - nfs4_lock_state(); - if (!nfsd4_has_session(cstate)) { - status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); + status = lookup_clientid(&lockt->lt_clientid, cstate, nn); if (status) goto out; } @@ -4603,7 +5348,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; - inode = cstate->current_fh.fh_dentry->d_inode; file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); @@ -4626,7 +5370,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); + lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner, + cstate->clp); if (lo) file_lock->fl_owner = (fl_owner_t)lo; file_lock->fl_pid = current->tgid; @@ -4646,7 +5391,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_set_lock_denied(file_lock, &lockt->lt_denied); } out: - nfs4_unlock_state(); + if (lo) + nfs4_put_stateowner(&lo->lo_owner); if (file_lock) locks_free_lock(file_lock); return status; @@ -4670,23 +5416,21 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(locku->lu_offset, locku->lu_length)) return nfserr_inval; - nfs4_lock_state(); - status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, &locku->lu_stateid, NFS4_LOCK_STID, &stp, nn); if (status) goto out; - filp = find_any_file(stp->st_file); + filp = find_any_file(stp->st_stid.sc_file); if (!filp) { status = nfserr_lock_range; - goto out; + goto put_stateid; } file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; - goto out; + goto fput; } locks_init_lock(file_lock); file_lock->fl_type = F_UNLCK; @@ -4708,41 +5452,51 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - +fput: + fput(filp); +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); return status; out_nfserr: status = nfserrno(err); - goto out; + goto fput; } /* * returns - * 1: locks held by lockowner - * 0: no locks held by lockowner + * true: locks held by lockowner + * false: no locks held by lockowner */ -static int -check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) +static bool +check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock **flpp; - struct inode *inode = filp->fi_inode; - int status = 0; + int status = false; + struct file *filp = find_any_file(fp); + struct inode *inode; + + if (!filp) { + /* Any valid lock stateid should have some sort of access */ + WARN_ON_ONCE(1); + return status; + } + + inode = file_inode(filp); spin_lock(&inode->i_lock); for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { if ((*flpp)->fl_owner == (fl_owner_t)lowner) { - status = 1; - goto out; + status = true; + break; } } -out: spin_unlock(&inode->i_lock); + fput(filp); return status; } @@ -4753,53 +5507,46 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, { clientid_t *clid = &rlockowner->rl_clientid; struct nfs4_stateowner *sop; - struct nfs4_lockowner *lo; + struct nfs4_lockowner *lo = NULL; struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; - struct list_head matches; - unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = ownerstr_hashval(owner); __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - nfs4_lock_state(); - - status = lookup_clientid(clid, cstate->minorversion, nn, NULL); + status = lookup_clientid(clid, cstate, nn); if (status) - goto out; + return status; - status = nfserr_locks_held; - INIT_LIST_HEAD(&matches); + clp = cstate->clp; + /* Find the matching lock stateowner */ + spin_lock(&clp->cl_lock); + list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval], + so_strhash) { - list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { - if (sop->so_is_open_owner) + if (sop->so_is_open_owner || !same_owner_str(sop, owner)) continue; - if (!same_owner_str(sop, owner, clid)) - continue; - list_for_each_entry(stp, &sop->so_stateids, - st_perstateowner) { - lo = lockowner(sop); - if (check_for_locks(stp->st_file, lo)) - goto out; - list_add(&lo->lo_list, &matches); + + /* see if there are still any locks associated with it */ + lo = lockowner(sop); + list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + status = nfserr_locks_held; + spin_unlock(&clp->cl_lock); + return status; + } } + + atomic_inc(&sop->so_count); + break; } - /* Clients probably won't expect us to return with some (but not all) - * of the lockowner state released; so don't release any until all - * have been checked. */ - status = nfs_ok; - while (!list_empty(&matches)) { - lo = list_entry(matches.next, struct nfs4_lockowner, - lo_list); - /* unhash_stateowner deletes so_perclient only - * for openowners. */ - list_del(&lo->lo_list); + spin_unlock(&clp->cl_lock); + if (lo) release_lockowner(lo); - } -out: - nfs4_unlock_state(); return status; } @@ -4887,34 +5634,123 @@ nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) * Called from OPEN. Look for clientid in reclaim list. */ __be32 -nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) +nfs4_check_open_reclaim(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn) { - struct nfs4_client *clp; + __be32 status; /* find clientid in conf_id_hashtbl */ - clp = find_confirmed_client(clid, sessions, nn); - if (clp == NULL) + status = lookup_clientid(clid, cstate, nn); + if (status) return nfserr_reclaim_bad; - return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok; + if (nfsd4_client_record_check(cstate->clp)) + return nfserr_reclaim_bad; + + return nfs_ok; } #ifdef CONFIG_NFSD_FAULT_INJECTION +static inline void +put_client(struct nfs4_client *clp) +{ + atomic_dec(&clp->cl_refcount); +} -u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) +static struct nfs4_client * +nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) { - if (mark_client_expired(clp)) - return 0; - expire_client(clp); - return 1; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return NULL; + + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + if (memcmp(&clp->cl_addr, addr, addr_size) == 0) + return clp; + } + return NULL; } -u64 nfsd_print_client(struct nfs4_client *clp, u64 num) +u64 +nfsd_inject_print_clients(void) { + struct nfs4_client *clp; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); char buf[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - printk(KERN_INFO "NFS Client: %s\n", buf); - return 1; + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + pr_info("NFS Client: %s\n", buf); + ++count; + } + spin_unlock(&nn->client_lock); + + return count; +} + +u64 +nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) { + if (mark_client_expired_locked(clp) == nfs_ok) + ++count; + else + clp = NULL; + } + spin_unlock(&nn->client_lock); + + if (clp) + expire_client(clp); + + return count; +} + +u64 +nfsd_inject_forget_clients(u64 max) +{ + u64 count = 0; + struct nfs4_client *clp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + if (mark_client_expired_locked(clp) == nfs_ok) { + list_add(&clp->cl_lru, &reaplist); + if (max != 0 && ++count >= max) + break; + } + } + spin_unlock(&nn->client_lock); + + list_for_each_entry_safe(clp, next, &reaplist, cl_lru) + expire_client(clp); + + return count; } static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, @@ -4925,158 +5761,484 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); } -static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) +static void +nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, + struct list_head *collect) +{ + struct nfs4_client *clp = lst->st_stid.sc_client; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!collect) + return; + + lockdep_assert_held(&nn->client_lock); + atomic_inc(&clp->cl_refcount); + list_add(&lst->st_locks, collect); +} + +static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, + struct list_head *collect, + void (*func)(struct nfs4_ol_stateid *)) { struct nfs4_openowner *oop; - struct nfs4_lockowner *lop, *lo_next; struct nfs4_ol_stateid *stp, *st_next; + struct nfs4_ol_stateid *lst, *lst_next; u64 count = 0; + spin_lock(&clp->cl_lock); list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { - list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { - list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { - if (func) - func(lop); - if (++count == max) - return count; + list_for_each_entry_safe(stp, st_next, + &oop->oo_owner.so_stateids, st_perstateowner) { + list_for_each_entry_safe(lst, lst_next, + &stp->st_locks, st_locks) { + if (func) { + func(lst); + nfsd_inject_add_lock_to_list(lst, + collect); + } + ++count; + /* + * Despite the fact that these functions deal + * with 64-bit integers for "count", we must + * ensure that it doesn't blow up the + * clp->cl_refcount. Throw a warning if we + * start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) + goto out; } } } +out: + spin_unlock(&clp->cl_lock); return count; } -u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) +static u64 +nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect, + u64 max) { - return nfsd_foreach_client_lock(clp, max, release_lockowner); + return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid); } -u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) +static u64 +nfsd_print_client_locks(struct nfs4_client *clp) { - u64 count = nfsd_foreach_client_lock(clp, max, NULL); + u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL); nfsd_print_count(clp, count, "locked files"); return count; } -static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) +u64 +nfsd_inject_print_locks(void) +{ + struct nfs4_client *clp; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_locks(clp); + spin_unlock(&nn->client_lock); + + return count; +} + +static void +nfsd_reap_locks(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_ol_stateid *stp, *next; + + list_for_each_entry_safe(stp, next, reaplist, st_locks) { + list_del_init(&stp->st_locks); + clp = stp->st_stid.sc_client; + nfs4_put_stid(&stp->st_stid); + put_client(clp); + } +} + +u64 +nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size) +{ + unsigned int count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_collect_client_locks(clp, &reaplist, 0); + spin_unlock(&nn->client_lock); + nfsd_reap_locks(&reaplist); + return count; +} + +u64 +nfsd_inject_forget_locks(u64 max) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + count += nfsd_collect_client_locks(clp, &reaplist, max - count); + if (max != 0 && count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_reap_locks(&reaplist); + return count; +} + +static u64 +nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, + struct list_head *collect, + void (*func)(struct nfs4_openowner *)) { struct nfs4_openowner *oop, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); u64 count = 0; + lockdep_assert_held(&nn->client_lock); + + spin_lock(&clp->cl_lock); list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { - if (func) + if (func) { func(oop); - if (++count == max) + if (collect) { + atomic_inc(&clp->cl_refcount); + list_add(&oop->oo_perclient, collect); + } + } + ++count; + /* + * Despite the fact that these functions deal with + * 64-bit integers for "count", we must ensure that + * it doesn't blow up the clp->cl_refcount. Throw a + * warning if we start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) break; } + spin_unlock(&clp->cl_lock); return count; } -u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) +static u64 +nfsd_print_client_openowners(struct nfs4_client *clp) { - return nfsd_foreach_client_open(clp, max, release_openowner); + u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL); + + nfsd_print_count(clp, count, "openowners"); + return count; } -u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) +static u64 +nfsd_collect_client_openowners(struct nfs4_client *clp, + struct list_head *collect, u64 max) { - u64 count = nfsd_foreach_client_open(clp, max, NULL); - nfsd_print_count(clp, count, "open files"); - return count; + return nfsd_foreach_client_openowner(clp, max, collect, + unhash_openowner_locked); } -static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, - struct list_head *victims) +u64 +nfsd_inject_print_openowners(void) { - struct nfs4_delegation *dp, *next; + struct nfs4_client *clp; u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_openowners(clp); + spin_unlock(&nn->client_lock); - lockdep_assert_held(&state_lock); - list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { - if (victims) - list_move(&dp->dl_recall_lru, victims); - if (++count == max) - break; - } return count; } -u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) +static void +nfsd_reap_openowners(struct list_head *reaplist) { - struct nfs4_delegation *dp, *next; - LIST_HEAD(victims); - u64 count; + struct nfs4_client *clp; + struct nfs4_openowner *oop, *next; - spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, &victims); - spin_unlock(&state_lock); + list_for_each_entry_safe(oop, next, reaplist, oo_perclient) { + list_del_init(&oop->oo_perclient); + clp = oop->oo_owner.so_client; + release_openowner(oop); + put_client(clp); + } +} - list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) - revoke_delegation(dp); +u64 +nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr, + size_t addr_size) +{ + unsigned int count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_collect_client_openowners(clp, &reaplist, 0); + spin_unlock(&nn->client_lock); + nfsd_reap_openowners(&reaplist); return count; } -u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) +u64 +nfsd_inject_forget_openowners(u64 max) { - struct nfs4_delegation *dp, *next; - LIST_HEAD(victims); - u64 count; + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); - spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, &victims); - list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) - nfsd_break_one_deleg(dp); - spin_unlock(&state_lock); + if (!nfsd_netns_ready(nn)) + return count; + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + count += nfsd_collect_client_openowners(clp, &reaplist, + max - count); + if (max != 0 && count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_reap_openowners(&reaplist); return count; } -u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) +static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, + struct list_head *victims) { + struct nfs4_delegation *dp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); u64 count = 0; + lockdep_assert_held(&nn->client_lock); + spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, NULL); + list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { + if (victims) { + /* + * It's not safe to mess with delegations that have a + * non-zero dl_time. They might have already been broken + * and could be processed by the laundromat outside of + * the state_lock. Just leave them be. + */ + if (dp->dl_time != 0) + continue; + + atomic_inc(&clp->cl_refcount); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, victims); + } + ++count; + /* + * Despite the fact that these functions deal with + * 64-bit integers for "count", we must ensure that + * it doesn't blow up the clp->cl_refcount. Throw a + * warning if we start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) + break; + } spin_unlock(&state_lock); + return count; +} + +static u64 +nfsd_print_client_delegations(struct nfs4_client *clp) +{ + u64 count = nfsd_find_all_delegations(clp, 0, NULL); nfsd_print_count(clp, count, "delegations"); return count; } -u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) +u64 +nfsd_inject_print_delegations(void) { - struct nfs4_client *clp, *next; + struct nfs4_client *clp; u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); if (!nfsd_netns_ready(nn)) return 0; - list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { - count += func(clp, max - count); - if ((max != 0) && (count >= max)) - break; + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_delegations(clp); + spin_unlock(&nn->client_lock); + + return count; +} + +static void +nfsd_forget_delegations(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_delegation *dp, *next; + + list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { + list_del_init(&dp->dl_recall_lru); + clp = dp->dl_stid.sc_client; + revoke_delegation(dp); + put_client(clp); } +} +u64 +nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr, + size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_find_all_delegations(clp, 0, &reaplist); + spin_unlock(&nn->client_lock); + + nfsd_forget_delegations(&reaplist); return count; } -struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) +u64 +nfsd_inject_forget_delegations(u64 max) { + u64 count = 0; struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); if (!nfsd_netns_ready(nn)) - return NULL; + return count; + spin_lock(&nn->client_lock); list_for_each_entry(clp, &nn->client_lru, cl_lru) { - if (memcmp(&clp->cl_addr, addr, addr_size) == 0) - return clp; + count += nfsd_find_all_delegations(clp, max - count, &reaplist); + if (max != 0 && count >= max) + break; } - return NULL; + spin_unlock(&nn->client_lock); + nfsd_forget_delegations(&reaplist); + return count; } +static void +nfsd_recall_delegations(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_delegation *dp, *next; + + list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { + list_del_init(&dp->dl_recall_lru); + clp = dp->dl_stid.sc_client; + /* + * We skipped all entries that had a zero dl_time before, + * so we can now reset the dl_time back to 0. If a delegation + * break comes in now, then it won't make any difference since + * we're recalling it either way. + */ + spin_lock(&state_lock); + dp->dl_time = 0; + spin_unlock(&state_lock); + nfsd_break_one_deleg(dp); + put_client(clp); + } +} + +u64 +nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr, + size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_find_all_delegations(clp, 0, &reaplist); + spin_unlock(&nn->client_lock); + + nfsd_recall_delegations(&reaplist); + return count; +} + +u64 +nfsd_inject_recall_delegations(u64 max) +{ + u64 count = 0; + struct nfs4_client *clp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + count += nfsd_find_all_delegations(clp, max - count, &reaplist); + if (max != 0 && ++count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_recall_delegations(&reaplist); + return count; +} #endif /* CONFIG_NFSD_FAULT_INJECTION */ /* @@ -5113,14 +6275,6 @@ static int nfs4_state_create_net(struct net *net) CLIENT_HASH_SIZE, GFP_KERNEL); if (!nn->unconf_id_hashtbl) goto err_unconf_id; - nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * - OWNER_HASH_SIZE, GFP_KERNEL); - if (!nn->ownerstr_hashtbl) - goto err_ownerstr; - nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) * - LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL); - if (!nn->lockowner_ino_hashtbl) - goto err_lockowner_ino; nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * SESSION_HASH_SIZE, GFP_KERNEL); if (!nn->sessionid_hashtbl) @@ -5130,10 +6284,6 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); } - for (i = 0; i < OWNER_HASH_SIZE; i++) - INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]); - for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) - INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]); for (i = 0; i < SESSION_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; @@ -5149,10 +6299,6 @@ static int nfs4_state_create_net(struct net *net) return 0; err_sessionid: - kfree(nn->lockowner_ino_hashtbl); -err_lockowner_ino: - kfree(nn->ownerstr_hashtbl); -err_ownerstr: kfree(nn->unconf_id_hashtbl); err_unconf_id: kfree(nn->conf_id_hashtbl); @@ -5182,8 +6328,6 @@ nfs4_state_destroy_net(struct net *net) } kfree(nn->sessionid_hashtbl); - kfree(nn->lockowner_ino_hashtbl); - kfree(nn->ownerstr_hashtbl); kfree(nn->unconf_id_hashtbl); kfree(nn->conf_id_hashtbl); put_net(net); @@ -5247,22 +6391,22 @@ nfs4_state_shutdown_net(struct net *net) cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); - nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - destroy_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_stid(&dp->dl_stid); } nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - nfs4_unlock_state(); } void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 944275c8f56..f9821ce6658 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -181,28 +181,43 @@ static int zero_clientid(clientid_t *clid) } /** - * defer_free - mark an allocation as deferred freed - * @argp: NFSv4 compound argument structure to be freed with - * @release: release callback to free @p, typically kfree() - * @p: pointer to be freed + * svcxdr_tmpalloc - allocate memory to be freed after compound processing + * @argp: NFSv4 compound argument structure + * @p: pointer to be freed (with kfree()) * * Marks @p to be freed when processing the compound operation * described in @argp finishes. */ -static int -defer_free(struct nfsd4_compoundargs *argp, - void (*release)(const void *), void *p) +static void * +svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) { - struct tmpbuf *tb; + struct svcxdr_tmpbuf *tb; - tb = kmalloc(sizeof(*tb), GFP_KERNEL); + tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL); if (!tb) - return -ENOMEM; - tb->buf = p; - tb->release = release; + return NULL; tb->next = argp->to_free; argp->to_free = tb; - return 0; + return tb->buf; +} + +/* + * For xdr strings that need to be passed to other kernel api's + * as null-terminated strings. + * + * Note null-terminating in place usually isn't safe since the + * buffer might end on a page boundary. + */ +static char * +svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) +{ + char *p = svcxdr_tmpalloc(argp, len + 1); + + if (!p) + return NULL; + memcpy(p, buf, len); + p[len] = '\0'; + return p; } /** @@ -217,19 +232,13 @@ defer_free(struct nfsd4_compoundargs *argp, */ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { - if (p == argp->tmp) { - p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); - if (!p) - return NULL; - } else { - BUG_ON(p != argp->tmpp); - argp->tmpp = NULL; - } - if (defer_free(argp, kfree, p)) { - kfree(p); + void *ret; + + ret = svcxdr_tmpalloc(argp, nbytes); + if (!ret) return NULL; - } else - return (char *)p; + memcpy(ret, p, nbytes); + return ret; } static __be32 @@ -292,12 +301,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (nace > NFS4_ACL_MAX) return nfserr_fbig; - *acl = nfs4_acl_new(nace); + *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace)); if (*acl == NULL) return nfserr_jukebox; - defer_free(argp, kfree, *acl); - (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; @@ -418,12 +425,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return nfserr_badlabel; len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + label->len = dummy32; + label->data = svcxdr_dupstr(argp, buf, dummy32); if (!label->data) return nfserr_jukebox; - label->len = dummy32; - defer_free(argp, kfree, label->data); - memcpy(label->data, buf, dummy32); } #endif @@ -598,20 +603,11 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create switch (create->cr_type) { case NF4LNK: READ_BUF(4); - create->cr_linklen = be32_to_cpup(p++); - READ_BUF(create->cr_linklen); - /* - * The VFS will want a null-terminated string, and - * null-terminating in place isn't safe since this might - * end on a page boundary: - */ - create->cr_linkname = - kmalloc(create->cr_linklen + 1, GFP_KERNEL); - if (!create->cr_linkname) + create->cr_datalen = be32_to_cpup(p++); + READ_BUF(create->cr_datalen); + create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen); + if (!create->cr_data) return nfserr_jukebox; - memcpy(create->cr_linkname, p, create->cr_linklen); - create->cr_linkname[create->cr_linklen] = '\0'; - defer_free(argp, kfree, create->cr_linkname); break; case NF4BLK: case NF4CHR: @@ -1481,13 +1477,12 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta INIT_LIST_HEAD(&test_stateid->ts_stateid_list); for (i = 0; i < test_stateid->ts_num_ids; i++) { - stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); + stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); if (!stateid) { status = nfserrno(-ENOMEM); goto out; } - defer_free(argp, kfree, stateid); INIT_LIST_HEAD(&stateid->ts_id_list); list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); @@ -1640,7 +1635,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) goto xdr_error; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { - argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); + argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; dprintk("nfsd: couldn't allocate room for COMPOUND\n"); @@ -3077,11 +3072,8 @@ static __be32 nfsd4_encode_splice_read( __be32 nfserr; __be32 *p = xdr->p - 2; - /* - * Don't inline pages unless we know there's room for eof, - * count, and possible padding: - */ - if (xdr->end - xdr->p < 3) + /* Make sure there will be room for padding if needed */ + if (xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, file, @@ -3147,9 +3139,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, len = maxcount; v = 0; - thislen = (void *)xdr->end - (void *)xdr->p; - if (len < thislen) - thislen = len; + thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p)); p = xdr_reserve_space(xdr, (thislen+3)&~3); WARN_ON_ONCE(!p); resp->rqstp->rq_vec[v].iov_base = p; @@ -3216,10 +3206,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, xdr_commit_encode(xdr); maxcount = svc_max_payload(resp->rqstp); - if (maxcount > xdr->buf->buflen - xdr->buf->len) - maxcount = xdr->buf->buflen - xdr->buf->len; - if (maxcount > read->rd_length) - maxcount = read->rd_length; + maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); + maxcount = min_t(unsigned long, maxcount, read->rd_length); if (!read->rd_filp) { err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, @@ -3937,8 +3925,6 @@ status: * * XDR note: do not encode rp->rp_buflen: the buffer contains the * previously sent already encoded operation. - * - * called with nfs4_lock_state() held */ void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) @@ -3977,9 +3963,8 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) kfree(args->tmpp); args->tmpp = NULL; while (args->to_free) { - struct tmpbuf *tb = args->to_free; + struct svcxdr_tmpbuf *tb = args->to_free; args->to_free = tb->next; - tb->release(tb->buf); kfree(tb); } return 1; @@ -4012,7 +3997,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo /* * All that remains is to write the tag and operation count... */ - struct nfsd4_compound_state *cs = &resp->cstate; struct xdr_buf *buf = resp->xdr.buf; WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + @@ -4026,19 +4010,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); - if (nfsd4_has_session(cs)) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - struct nfs4_client *clp = cs->session->se_client; - if (cs->status != nfserr_replay_cache) { - nfsd4_store_cache_entry(resp); - cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; - } - /* Renew the clientid on success and on replay */ - spin_lock(&nn->client_lock); - nfsd4_put_session(cs->session); - spin_unlock(&nn->client_lock); - put_client_renew(clp); - } + nfsd4_sequence_done(resp); return 1; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 6040da8830f..ff956763324 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -221,7 +221,12 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); + /* + * No point in byte swapping c_xid since we're just using it to pick + * a hash bucket. + */ + hlist_add_head(&rp->c_hash, cache_hash + + hash_32((__force u32)rp->c_xid, maskbits)); } /* @@ -356,7 +361,11 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) struct hlist_head *rh; unsigned int entries = 0; - rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; + /* + * No point in byte swapping rq_xid since we're just using it to pick + * a hash bucket. + */ + rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)]; hlist_for_each_entry(rp, rh, c_hash) { ++entries; if (nfsd_cache_match(rqstp, csum, rp)) { diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 51844048937..4e042105fb6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -39,6 +39,7 @@ enum { NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, + NFSD_MaxConnections, NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] @@ -62,6 +63,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); +static ssize_t write_maxconn(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); @@ -77,6 +79,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Versions] = write_versions, [NFSD_Ports] = write_ports, [NFSD_MaxBlkSize] = write_maxblksize, + [NFSD_MaxConnections] = write_maxconn, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, @@ -369,8 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (maxsize < NFS_FHSIZE) return -EINVAL; - if (maxsize > NFS3_FHSIZE) - maxsize = NFS3_FHSIZE; + maxsize = min(maxsize, NFS3_FHSIZE); if (qword_get(&mesg, mesg, size)>0) return -EINVAL; @@ -871,10 +873,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) /* force bsize into allowed range and * required alignment. */ - if (bsize < 1024) - bsize = 1024; - if (bsize > NFSSVC_MAXBLKSIZE) - bsize = NFSSVC_MAXBLKSIZE; + bsize = max_t(int, bsize, 1024); + bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE); bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv) { @@ -889,6 +889,44 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize); } +/** + * write_maxconn - Set or report the current max number of connections + * + * Input: + * buf: ignored + * size: zero + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the new + * number of max connections + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C string + * containing numeric value of max_connections setting + * for this net namespace; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_maxconn(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unsigned int maxconn = nn->max_connections; + + if (size > 0) { + int rv = get_uint(&mesg, &maxconn); + + if (rv) + return rv; + nn->max_connections = maxconn; + } + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn); +} + #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time, struct nfsd_net *nn) @@ -1064,6 +1102,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ec839341815..e883a5868be 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -162,7 +162,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) /* deprecated, convert to type 3 */ len = key_len(FSID_ENCODE_DEV)/4; fh->fh_fsid_type = FSID_ENCODE_DEV; - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); + /* + * struct knfsd_fh uses host-endian fields, which are + * sometimes used to hold net-endian values. This + * confuses sparse, so we must use __force here to + * keep it from complaining. + */ + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), + ntohl((__force __be32)fh->fh_fsid[1]))); fh->fh_fsid[1] = fh->fh_fsid[2]; } data_left -= len; @@ -539,8 +546,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, dentry); fhp->fh_dentry = dget(dentry); /* our internal copy */ - fhp->fh_export = exp; - cache_get(&exp->h); + fhp->fh_export = exp_get(exp); if (fhp->fh_handle.fh_version == 0xca) { /* old style filehandle please */ diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 2e89e70ac15..08236d70c66 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -73,8 +73,15 @@ enum fsid_source { extern enum fsid_source fsid_source(struct svc_fh *fhp); -/* This might look a little large to "inline" but in all calls except +/* + * This might look a little large to "inline" but in all calls except * one, 'vers' is constant so moste of the function disappears. + * + * In some cases the values are considered to be host endian and in + * others, net endian. fsidv is always considered to be u32 as the + * callers don't know which it will be. So we must use __force to keep + * sparse from complaining. Since these values are opaque to the + * client, that shouldn't be a problem. */ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, u32 fsid, unsigned char *uuid) @@ -82,7 +89,7 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, u32 *up; switch(vers) { case FSID_DEV: - fsidv[0] = htonl((MAJOR(dev)<<16) | + fsidv[0] = (__force __u32)htonl((MAJOR(dev)<<16) | MINOR(dev)); fsidv[1] = ino_t_to_u32(ino); break; @@ -90,8 +97,8 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, fsidv[0] = fsid; break; case FSID_MAJOR_MINOR: - fsidv[0] = htonl(MAJOR(dev)); - fsidv[1] = htonl(MINOR(dev)); + fsidv[0] = (__force __u32)htonl(MAJOR(dev)); + fsidv[1] = (__force __u32)htonl(MINOR(dev)); fsidv[2] = ino_t_to_u32(ino); break; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 54c6b3d3cc7..b8680738f58 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -403,12 +403,13 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp, fh_init(&newfh, NFS_FHSIZE); /* - * Create the link, look up new file and set attrs. + * Crazy hack: the request fits in a page, and already-decoded + * attributes follow argp->tname, so it's safe to just write a + * null to ensure it's null-terminated: */ + argp->tname[argp->tlen] = '\0'; nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, argp->tlen, - &newfh, &argp->attrs); - + argp->tname, &newfh); fh_put(&argp->ffh); fh_put(&newfh); @@ -716,6 +717,7 @@ nfserrno (int errno) { nfserr_noent, -ENOENT }, { nfserr_io, -EIO }, { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, { nfserr_acces, -EACCES }, { nfserr_exist, -EEXIST }, { nfserr_xdev, -EXDEV }, @@ -743,6 +745,7 @@ nfserrno (int errno) { nfserr_notsupp, -EOPNOTSUPP }, { nfserr_toosmall, -ETOOSMALL }, { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, }; int i; @@ -750,7 +753,7 @@ nfserrno (int errno) if (nfs_errtbl[i].syserr == errno) return nfs_errtbl[i].nfserr; } - printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno); + WARN(1, "nfsd: non-standard errno: %d\n", errno); return nfserr_io; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1879e43f286..752d56bbe0b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -221,7 +221,8 @@ static int nfsd_startup_generic(int nrservs) */ ret = nfsd_racache_init(2*nrservs); if (ret) - return ret; + goto dec_users; + ret = nfs4_state_start(); if (ret) goto out_racache; @@ -229,6 +230,8 @@ static int nfsd_startup_generic(int nrservs) out_racache: nfsd_racache_shutdown(); +dec_users: + nfsd_users--; return ret; } @@ -405,6 +408,7 @@ int nfsd_create_serv(struct net *net) if (nn->nfsd_serv == NULL) return -ENOMEM; + nn->nfsd_serv->sv_maxconn = nn->max_connections; error = svc_bind(nn->nfsd_serv, net); if (error < 0) { svc_destroy(nn->nfsd_serv); @@ -469,8 +473,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* enforce a global maximum number of threads */ tot = 0; for (i = 0; i < n; i++) { - if (nthreads[i] > NFSD_MAXSERVS) - nthreads[i] = NFSD_MAXSERVS; + nthreads[i] = min(nthreads[i], NFSD_MAXSERVS); tot += nthreads[i]; } if (tot > NFSD_MAXSERVS) { @@ -519,11 +522,11 @@ nfsd_svc(int nrservs, struct net *net) mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); - if (nrservs <= 0) - nrservs = 0; - if (nrservs > NFSD_MAXSERVS) - nrservs = NFSD_MAXSERVS; + + nrservs = max(nrservs, 0); + nrservs = min(nrservs, NFSD_MAXSERVS); error = 0; + if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; @@ -564,6 +567,7 @@ nfsd(void *vrqstp) struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); struct net *net = perm_sock->xpt_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); int err; /* Lock module and set up kernel thread */ @@ -597,6 +601,9 @@ nfsd(void *vrqstp) * The main request loop */ for (;;) { + /* Update sv_maxconn if it has changed */ + rqstp->rq_server->sv_maxconn = nn->max_connections; + /* * Find a socket with data available and call its * recvfrom routine. diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 1ac306b769d..412d7061f9e 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -257,8 +257,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, len = args->count = ntohl(*p++); p++; /* totalcount - unused */ - if (len > NFSSVC_MAXBLKSIZE_V2) - len = NFSSVC_MAXBLKSIZE_V2; + len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2); /* set up somewhere to store response. * We take pages, put them on reslist and include in iovec @@ -268,7 +267,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct page *p = *(rqstp->rq_next_page++); rqstp->rq_vec[v].iov_base = page_address(p); - rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); len -= rqstp->rq_vec[v].iov_len; v++; } @@ -400,9 +399,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, return 0; args->cookie = ntohl(*p++); args->count = ntohl(*p++); - if (args->count > PAGE_SIZE) - args->count = PAGE_SIZE; - + args->count = min_t(u32, args->count, PAGE_SIZE); args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); @@ -516,10 +513,11 @@ nfssvc_encode_entry(void *ccdv, const char *name, } if (cd->offset) *cd->offset = htonl(offset); - if (namlen > NFS2_MAXNAMLEN) - namlen = NFS2_MAXNAMLEN;/* truncate filename */ + /* truncate filename */ + namlen = min(namlen, NFS2_MAXNAMLEN); slen = XDR_QUADLEN(namlen); + if ((buflen = cd->buflen - slen - 4) < 0) { cd->common.err = nfserr_toosmall; return -EINVAL; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 374c66283ac..4a89e00d746 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -72,7 +72,13 @@ struct nfsd4_callback { bool cb_done; }; +/* + * A core object that represents a "common" stateid. These are generally + * embedded within the different (more specific) stateid objects and contain + * fields that are of general use to any stateid. + */ struct nfs4_stid { + atomic_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -80,22 +86,43 @@ struct nfs4_stid { #define NFS4_CLOSED_STID 8 /* For a deleg stateid kept around only to process free_stateid's: */ #define NFS4_REVOKED_DELEG_STID 16 +#define NFS4_CLOSED_DELEG_STID 32 unsigned char sc_type; stateid_t sc_stateid; struct nfs4_client *sc_client; + struct nfs4_file *sc_file; + void (*sc_free)(struct nfs4_stid *); }; +/* + * Represents a delegation stateid. The nfs4_client holds references to these + * and they are put when it is being destroyed or when the delegation is + * returned by the client: + * + * o 1 reference as long as a delegation is still in force (taken when it's + * alloc'd, put when it's returned or revoked) + * + * o 1 reference as long as a recall rpc is in progress (taken when the lease + * is broken, put when the rpc exits) + * + * o 1 more ephemeral reference for each nfsd thread currently doing something + * with that delegation without holding the cl_lock + * + * If the server attempts to recall a delegation and the client doesn't do so + * before a timeout, the server may also revoke the delegation. In that case, + * the object will either be destroyed (v4.0) or moved to a per-client list of + * revoked delegations (v4.1+). + * + * This object is a superset of the nfs4_stid. + */ struct nfs4_delegation { struct nfs4_stid dl_stid; /* must be first field */ struct list_head dl_perfile; struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ - atomic_t dl_count; /* ref count */ - struct nfs4_file *dl_file; u32 dl_type; time_t dl_time; /* For recall: */ - struct knfsd_fh dl_fh; int dl_retries; struct nfsd4_callback dl_recall; }; @@ -194,6 +221,11 @@ struct nfsd4_conn { unsigned char cn_flags; }; +/* + * Representation of a v4.1+ session. These are refcounted in a similar fashion + * to the nfs4_client. References are only taken when the server is actively + * working on the object (primarily during the processing of compounds). + */ struct nfsd4_session { atomic_t se_ref; struct list_head se_hash; /* hash by sessionid */ @@ -212,8 +244,6 @@ struct nfsd4_session { struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; -extern void nfsd4_put_session(struct nfsd4_session *ses); - /* formatted contents of nfs4_sessionid */ struct nfsd4_sessionid { clientid_t clientid; @@ -225,17 +255,35 @@ struct nfsd4_sessionid { /* * struct nfs4_client - one per client. Clientids live here. - * o Each nfs4_client is hashed by clientid. * - * o Each nfs4_clients is also hashed by name - * (the opaque quantity initially sent by the client to identify itself). + * The initial object created by an NFS client using SETCLIENTID (for NFSv4.0) + * or EXCHANGE_ID (for NFSv4.1+). These objects are refcounted and timestamped. + * Each nfsd_net_ns object contains a set of these and they are tracked via + * short and long form clientid. They are hashed and searched for under the + * per-nfsd_net client_lock spinlock. + * + * References to it are only held during the processing of compounds, and in + * certain other operations. In their "resting state" they have a refcount of + * 0. If they are not renewed within a lease period, they become eligible for + * destruction by the laundromat. + * + * These objects can also be destroyed prematurely by the fault injection code, + * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates. + * Care is taken *not* to do this however when the objects have an elevated + * refcount. + * + * o Each nfs4_client is hashed by clientid + * + * o Each nfs4_clients is also hashed by name (the opaque quantity initially + * sent by the client to identify itself). * - * o cl_perclient list is used to ensure no dangling stateowner references - * when we expire the nfs4_client + * o cl_perclient list is used to ensure no dangling stateowner references + * when we expire the nfs4_client */ struct nfs4_client { struct list_head cl_idhash; /* hash by cl_clientid.id */ struct rb_node cl_namenode; /* link into by-name trees */ + struct list_head *cl_ownerstr_hashtbl; struct list_head cl_openowners; struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; @@ -329,21 +377,43 @@ struct nfs4_replay { unsigned int rp_buflen; char *rp_buf; struct knfsd_fh rp_openfh; + struct mutex rp_mutex; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; +struct nfs4_stateowner; + +struct nfs4_stateowner_operations { + void (*so_unhash)(struct nfs4_stateowner *); + void (*so_free)(struct nfs4_stateowner *); +}; + +/* + * A core object that represents either an open or lock owner. The object and + * lock owner objects have one of these embedded within them. Refcounts and + * other fields common to both owner types are contained within these + * structures. + */ struct nfs4_stateowner { - struct list_head so_strhash; /* hash by op_name */ - struct list_head so_stateids; - struct nfs4_client * so_client; - /* after increment in ENCODE_SEQID_OP_TAIL, represents the next + struct list_head so_strhash; + struct list_head so_stateids; + struct nfs4_client *so_client; + const struct nfs4_stateowner_operations *so_ops; + /* after increment in nfsd4_bump_seqid, represents the next * sequence id expected from the client: */ - u32 so_seqid; - struct xdr_netobj so_owner; /* open owner name */ - struct nfs4_replay so_replay; - bool so_is_open_owner; + atomic_t so_count; + u32 so_seqid; + struct xdr_netobj so_owner; /* open owner name */ + struct nfs4_replay so_replay; + bool so_is_open_owner; }; +/* + * When a file is opened, the client provides an open state owner opaque string + * that indicates the "owner" of that open. These objects are refcounted. + * References to it are held by each open state associated with it. This object + * is a superset of the nfs4_stateowner struct. + */ struct nfs4_openowner { struct nfs4_stateowner oo_owner; /* must be first field */ struct list_head oo_perclient; @@ -358,15 +428,17 @@ struct nfs4_openowner { struct nfs4_ol_stateid *oo_last_closed_stid; time_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 -#define NFS4_OO_NEW 4 unsigned char oo_flags; }; +/* + * Represents a generic "lockowner". Similar to an openowner. References to it + * are held by the lock stateids that are created on its behalf. This object is + * a superset of the nfs4_stateowner struct (or would be if it needed any extra + * fields). + */ struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ - struct list_head lo_owner_ino_hash; /* hash by owner,file */ - struct list_head lo_perstateid; - struct list_head lo_list; /* for temporary uses */ }; static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) @@ -379,9 +451,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) return container_of(so, struct nfs4_lockowner, lo_owner); } -/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ +/* + * nfs4_file: a file opened by some number of (open) nfs4_stateowners. + * + * These objects are global. nfsd only keeps one instance of a nfs4_file per + * inode (though it may keep multiple file descriptors open per inode). These + * are tracked in the file_hashtbl which is protected by the state_lock + * spinlock. + */ struct nfs4_file { atomic_t fi_ref; + spinlock_t fi_lock; struct hlist_node fi_hash; /* hash by "struct inode *" */ struct list_head fi_stateids; struct list_head fi_delegations; @@ -395,49 +475,36 @@ struct nfs4_file { * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set. */ atomic_t fi_access[2]; + u32 fi_share_deny; struct file *fi_deleg_file; struct file_lock *fi_lease; atomic_t fi_delegees; - struct inode *fi_inode; + struct knfsd_fh fi_fhandle; bool fi_had_conflict; }; -/* XXX: for first cut may fall back on returning file that doesn't work - * at all? */ -static inline struct file *find_writeable_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_WRONLY]) - return f->fi_fds[O_WRONLY]; - return f->fi_fds[O_RDWR]; -} - -static inline struct file *find_readable_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_RDONLY]) - return f->fi_fds[O_RDONLY]; - return f->fi_fds[O_RDWR]; -} - -static inline struct file *find_any_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_RDWR]) - return f->fi_fds[O_RDWR]; - else if (f->fi_fds[O_WRONLY]) - return f->fi_fds[O_WRONLY]; - else - return f->fi_fds[O_RDONLY]; -} - -/* "ol" stands for "Open or Lock". Better suggestions welcome. */ +/* + * A generic struct representing either a open or lock stateid. The nfs4_client + * holds a reference to each of these objects, and they in turn hold a + * reference to their respective stateowners. The client's reference is + * released in response to a close or unlock (depending on whether it's an open + * or lock stateid) or when the client is being destroyed. + * + * In the case of v4.0 open stateids, these objects are preserved for a little + * while after close in order to handle CLOSE replays. Those are eventually + * reclaimed via a LRU scheme by the laundromat. + * + * This object is a superset of the nfs4_stid. "ol" stands for "Open or Lock". + * Better suggestions welcome. + */ struct nfs4_ol_stateid { struct nfs4_stid st_stid; /* must be first field */ struct list_head st_perfile; struct list_head st_perstateowner; - struct list_head st_lockowners; + struct list_head st_locks; struct nfs4_stateowner * st_stateowner; - struct nfs4_file * st_file; - unsigned long st_access_bmap; - unsigned long st_deny_bmap; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; struct nfs4_ol_stateid * st_openstp; }; @@ -456,15 +523,16 @@ struct nfsd_net; extern __be32 nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filp); -extern void nfs4_lock_state(void); -extern void nfs4_unlock_state(void); +void nfs4_put_stid(struct nfs4_stid *s); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); -extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); +extern __be32 nfs4_check_open_reclaim(clientid_t *clid, + struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern int set_callback_cred(void); -extern void nfsd4_init_callback(struct nfsd4_callback *); +void nfsd4_run_cb_null(struct work_struct *w); +void nfsd4_run_cb_recall(struct work_struct *w); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); @@ -472,11 +540,10 @@ extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); -extern void nfs4_put_delegation(struct nfs4_delegation *dp); +extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); -extern void put_client_renew(struct nfs4_client *clp); /* nfs4recover operations */ extern int nfsd4_client_tracking_init(struct net *net); @@ -490,19 +557,24 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); #ifdef CONFIG_NFSD_FAULT_INJECTION int nfsd_fault_inject_init(void); void nfsd_fault_inject_cleanup(void); -u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); -struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); - -u64 nfsd_forget_client(struct nfs4_client *, u64); -u64 nfsd_forget_client_locks(struct nfs4_client*, u64); -u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); -u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); -u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); - -u64 nfsd_print_client(struct nfs4_client *, u64); -u64 nfsd_print_client_locks(struct nfs4_client *, u64); -u64 nfsd_print_client_openowners(struct nfs4_client *, u64); -u64 nfsd_print_client_delegations(struct nfs4_client *, u64); + +u64 nfsd_inject_print_clients(void); +u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_clients(u64); + +u64 nfsd_inject_print_locks(void); +u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_locks(u64); + +u64 nfsd_inject_print_openowners(void); +u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_openowners(u64); + +u64 nfsd_inject_print_delegations(void); +u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_delegations(u64); +u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t); +u64 nfsd_inject_recall_delegations(u64); #else /* CONFIG_NFSD_FAULT_INJECTION */ static inline int nfsd_fault_inject_init(void) { return 0; } static inline void nfsd_fault_inject_cleanup(void) {} diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 140c496f612..f501a9b5c9d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -189,8 +189,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); dparent = fhp->fh_dentry; - exp = fhp->fh_export; - exp_get(exp); + exp = exp_get(fhp->fh_export); /* Lookup the name, but don't follow links */ if (isdotent(name, len)) { @@ -464,7 +463,7 @@ out_put_write_access: if (size_change) put_write_access(inode); if (!err) - commit_metadata(fhp); + err = nfserrno(commit_metadata(fhp)); out: return err; } @@ -820,7 +819,8 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) +static __be32 +nfsd_finish_read(struct file *file, unsigned long *count, int host_err) { if (host_err >= 0) { nfsdstats.io_read += host_err; @@ -831,7 +831,7 @@ __be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) return nfserrno(host_err); } -int nfsd_splice_read(struct svc_rqst *rqstp, +__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct file *file, loff_t offset, unsigned long *count) { struct splice_desc sd = { @@ -847,7 +847,7 @@ int nfsd_splice_read(struct svc_rqst *rqstp, return nfsd_finish_read(file, count, host_err); } -int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, +__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { mm_segment_t oldfs; @@ -1121,7 +1121,8 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); - return 0; + /* Callers expect file metadata to be committed here */ + return nfserrno(commit_metadata(resfhp)); } /* HPUX client sometimes creates a file in mode 000, and sets size to 0. @@ -1253,9 +1254,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, resfhp, iap); /* - * nfsd_setattr already committed the child. Transactional filesystems - * had a chance to commit changes for both parent and child - * simultaneously making the following commit_metadata a noop. + * nfsd_create_setattr already committed the child. Transactional + * filesystems had a chance to commit changes for both parent and + * child * simultaneously making the following commit_metadata a + * noop. */ err2 = nfserrno(commit_metadata(fhp)); if (err2) @@ -1426,7 +1428,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, resfhp, iap); /* - * nfsd_setattr already committed the child (and possibly also the parent). + * nfsd_create_setattr already committed the child + * (and possibly also the parent). */ if (!err) err = nfserrno(commit_metadata(fhp)); @@ -1504,16 +1507,15 @@ out_nfserr: __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, - char *path, int plen, - struct svc_fh *resfhp, - struct iattr *iap) + char *path, + struct svc_fh *resfhp) { struct dentry *dentry, *dnew; __be32 err, cerr; int host_err; err = nfserr_noent; - if (!flen || !plen) + if (!flen || path[0] == '\0') goto out; err = nfserr_exist; if (isdotent(fname, flen)) @@ -1534,18 +1536,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dnew)) goto out_nfserr; - if (unlikely(path[plen] != 0)) { - char *path_alloced = kmalloc(plen+1, GFP_KERNEL); - if (path_alloced == NULL) - host_err = -ENOMEM; - else { - strncpy(path_alloced, path, plen); - path_alloced[plen] = 0; - host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced); - kfree(path_alloced); - } - } else - host_err = vfs_symlink(dentry->d_inode, dnew, path); + host_err = vfs_symlink(dentry->d_inode, dnew, path); err = nfserrno(host_err); if (!err) err = nfserrno(commit_metadata(fhp)); @@ -2093,8 +2084,7 @@ nfsd_racache_init(int cache_size) if (raparm_hash[0].pb_head) return 0; nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); - if (nperbucket < 2) - nperbucket = 2; + nperbucket = max(2, nperbucket); cache_size = nperbucket * RAPARM_HASH_SIZE; dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 91b6ae3f658..c2ff3f14e5f 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -74,9 +74,9 @@ struct raparms; __be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, struct file **, struct raparms **); void nfsd_put_tmp_read_open(struct file *, struct raparms *); -int nfsd_splice_read(struct svc_rqst *, +__be32 nfsd_splice_read(struct svc_rqst *, struct file *, loff_t, unsigned long *); -int nfsd_readv(struct file *, loff_t, struct kvec *, int, +__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); @@ -85,8 +85,8 @@ __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, - char *name, int len, char *path, int plen, - struct svc_fh *res, struct iattr *); + char *name, int len, char *path, + struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); __be32 nfsd_rename(struct svc_rqst *, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 18cbb6d9c8a..465e7799742 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -55,6 +55,7 @@ struct nfsd4_compound_state { struct svc_fh current_fh; struct svc_fh save_fh; struct nfs4_stateowner *replay_owner; + struct nfs4_client *clp; /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; @@ -107,8 +108,8 @@ struct nfsd4_create { u32 cr_type; /* request */ union { /* request */ struct { - u32 namelen; - char *name; + u32 datalen; + char *data; } link; /* NF4LNK */ struct { u32 specdata1; @@ -121,8 +122,8 @@ struct nfsd4_create { struct nfs4_acl *cr_acl; struct xdr_netobj cr_label; }; -#define cr_linklen u.link.namelen -#define cr_linkname u.link.name +#define cr_datalen u.link.datalen +#define cr_data u.link.data #define cr_specdata1 u.dev.specdata1 #define cr_specdata2 u.dev.specdata2 @@ -478,6 +479,14 @@ struct nfsd4_op { bool nfsd4_cache_this_op(struct nfsd4_op *); +/* + * Memory needed just for the duration of processing one compound: + */ +struct svcxdr_tmpbuf { + struct svcxdr_tmpbuf *next; + char buf[]; +}; + struct nfsd4_compoundargs { /* scratch variables for XDR decode */ __be32 * p; @@ -486,11 +495,7 @@ struct nfsd4_compoundargs { int pagelen; __be32 tmp[8]; __be32 * tmpp; - struct tmpbuf { - struct tmpbuf *next; - void (*release)(const void *); - void *buf; - } *to_free; + struct svcxdr_tmpbuf *to_free; struct svc_rqst *rqstp; @@ -574,7 +579,6 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); -extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); @@ -585,6 +589,7 @@ extern __be32 nfsd4_create_session(struct svc_rqst *, extern __be32 nfsd4_sequence(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_sequence *); +extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp); extern __be32 nfsd4_destroy_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_session *); @@ -594,7 +599,9 @@ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, struct nfsd4_open *open, struct nfsd_net *nn); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); -extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); +extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate); +extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open, __be32 status); extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); extern __be32 nfsd4_close(struct svc_rqst *rqstp, @@ -625,6 +632,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); + #endif /* diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 239493ec718..7151ea42804 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -23,6 +23,7 @@ proc-y += version.o proc-y += softirqs.o proc-y += namespaces.o proc-y += self.o +proc-y += thread_self.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 043c83cb51f..baf852b648a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2814,7 +2814,7 @@ retry: return iter; } -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) +#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file *file, struct dir_context *ctx) @@ -2826,14 +2826,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) if (pos >= PID_MAX_LIMIT + TGID_OFFSET) return 0; - if (pos == TGID_OFFSET - 1) { + if (pos == TGID_OFFSET - 2) { struct inode *inode = ns->proc_self->d_inode; if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) return 0; - iter.tgid = 0; - } else { - iter.tgid = pos - TGID_OFFSET; + ctx->pos = pos = pos + 1; + } + if (pos == TGID_OFFSET - 1) { + struct inode *inode = ns->proc_thread_self->d_inode; + if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) + return 0; + ctx->pos = pos = pos + 1; } + iter.tgid = pos - TGID_OFFSET; iter.task = NULL; for (iter = next_tgid(ns, iter); iter.task; @@ -2862,6 +2867,9 @@ static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), +#ifdef CONFIG_NET + DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), +#endif REG("environ", S_IRUSR, proc_environ_operations), ONE("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 0adbc02d60e..333080d7a67 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) int proc_fill_super(struct super_block *s) { struct inode *root_inode; + int ret; s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; s->s_blocksize = 1024; @@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s) return -ENOMEM; } - return proc_setup_self(s); + ret = proc_setup_self(s); + if (ret) { + return ret; + } + return proc_setup_thread_self(s); } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a024cf7b260..7da13e49128 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -231,6 +231,12 @@ static inline int proc_net_init(void) { return 0; } extern int proc_setup_self(struct super_block *); /* + * proc_thread_self.c + */ +extern int proc_setup_thread_self(struct super_block *); +extern void proc_thread_self_init(void); + +/* * proc_sysctl.c */ #ifdef CONFIG_PROC_SYSCTL diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 4677bb7dc7c..39481028ec0 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir) rcu_read_lock(); task = pid_task(proc_pid(dir), PIDTYPE_PID); if (task != NULL) { - ns = task_nsproxy(task); + task_lock(task); + ns = task->nsproxy; if (ns != NULL) net = get_net(ns->net_ns); + task_unlock(task); } rcu_read_unlock(); @@ -224,7 +226,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { int __init proc_net_init(void) { - proc_symlink("net", NULL, "self/net"); + proc_symlink("net", NULL, "thread-self/net"); return register_pernet_subsys(&proc_net_ns_ops); } diff --git a/fs/proc/root.c b/fs/proc/root.c index 574bafc41f0..6296c762696 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb) ns = (struct pid_namespace *)sb->s_fs_info; if (ns->proc_self) dput(ns->proc_self); + if (ns->proc_thread_self) + dput(ns->proc_thread_self); kill_anon_super(sb); put_pid_ns(ns); } @@ -170,7 +172,8 @@ void __init proc_root_init(void) return; proc_self_init(); - proc_symlink("mounts", NULL, "self/mounts"); + proc_thread_self_init(); + proc_symlink("mounts", NULL, "thread-self/mounts"); proc_net_init(); diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c new file mode 100644 index 00000000000..59075b509df --- /dev/null +++ b/fs/proc/thread_self.c @@ -0,0 +1,85 @@ +#include <linux/sched.h> +#include <linux/namei.h> +#include <linux/slab.h> +#include <linux/pid_namespace.h> +#include "internal.h" + +/* + * /proc/thread_self: + */ +static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + pid_t pid = task_pid_nr_ns(current, ns); + char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF]; + if (!pid) + return -ENOENT; + sprintf(tmp, "%d/task/%d", tgid, pid); + return readlink_copy(buffer, buflen, tmp); +} + +static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + pid_t pid = task_pid_nr_ns(current, ns); + char *name = ERR_PTR(-ENOENT); + if (pid) { + name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL); + if (!name) + name = ERR_PTR(-ENOMEM); + else + sprintf(name, "%d/task/%d", tgid, pid); + } + nd_set_link(nd, name); + return NULL; +} + +static const struct inode_operations proc_thread_self_inode_operations = { + .readlink = proc_thread_self_readlink, + .follow_link = proc_thread_self_follow_link, + .put_link = kfree_put_link, +}; + +static unsigned thread_self_inum; + +int proc_setup_thread_self(struct super_block *s) +{ + struct inode *root_inode = s->s_root->d_inode; + struct pid_namespace *ns = s->s_fs_info; + struct dentry *thread_self; + + mutex_lock(&root_inode->i_mutex); + thread_self = d_alloc_name(s->s_root, "thread-self"); + if (thread_self) { + struct inode *inode = new_inode_pseudo(s); + if (inode) { + inode->i_ino = thread_self_inum; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_op = &proc_thread_self_inode_operations; + d_add(thread_self, inode); + } else { + dput(thread_self); + thread_self = ERR_PTR(-ENOMEM); + } + } else { + thread_self = ERR_PTR(-ENOMEM); + } + mutex_unlock(&root_inode->i_mutex); + if (IS_ERR(thread_self)) { + pr_err("proc_fill_super: can't allocate /proc/thread_self\n"); + return PTR_ERR(thread_self); + } + ns->proc_thread_self = thread_self; + return 0; +} + +void __init proc_thread_self_init(void) +{ + proc_alloc_inum(&thread_self_inum); +} diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 1a81373947f..73ca1740d83 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file, if (!task) goto err; - rcu_read_lock(); - nsp = task_nsproxy(task); + task_lock(task); + nsp = task->nsproxy; if (!nsp || !nsp->mnt_ns) { - rcu_read_unlock(); + task_unlock(task); put_task_struct(task); goto err; } ns = nsp->mnt_ns; get_mnt_ns(ns); - rcu_read_unlock(); - task_lock(task); if (!task->fs) { task_unlock(task); put_task_struct(task); |