/******************************************************************************
*******************************************************************************
**
**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
**
**  This copyrighted material is made available to anyone wishing to use,
**  modify, copy, or redistribute it subject to the terms and conditions
**  of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/

#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "lowcomms.h"
#include "rcom.h"
#include "config.h"
#include "memory.h"
#include "recover.h"
#include "util.h"
#include "lock.h"
#include "dir.h"


static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de)
{
	spin_lock(&ls->ls_recover_list_lock);
	list_add(&de->list, &ls->ls_recover_list);
	spin_unlock(&ls->ls_recover_list_lock);
}

static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
{
	int found = 0;
	struct dlm_direntry *de;

	spin_lock(&ls->ls_recover_list_lock);
	list_for_each_entry(de, &ls->ls_recover_list, list) {
		if (de->length == len) {
			list_del(&de->list);
			de->master_nodeid = 0;
			memset(de->name, 0, len);
			found = 1;
			break;
		}
	}
	spin_unlock(&ls->ls_recover_list_lock);

	if (!found)
		de = allocate_direntry(ls, len);
	return de;
}

void dlm_clear_free_entries(struct dlm_ls *ls)
{
	struct dlm_direntry *de;

	spin_lock(&ls->ls_recover_list_lock);
	while (!list_empty(&ls->ls_recover_list)) {
		de = list_entry(ls->ls_recover_list.next, struct dlm_direntry,
				list);
		list_del(&de->list);
		free_direntry(de);
	}
	spin_unlock(&ls->ls_recover_list_lock);
}

/*
 * We use the upper 16 bits of the hash value to select the directory node.
 * Low bits are used for distribution of rsb's among hash buckets on each node.
 *
 * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
 * num_nodes to the hash value.  This value in the desired range is used as an
 * offset into the sorted list of nodeid's to give the particular nodeid.
 */

int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
{
	struct list_head *tmp;
	struct dlm_member *memb = NULL;
	uint32_t node, n = 0;
	int nodeid;

	if (ls->ls_num_nodes == 1) {
		nodeid = dlm_our_nodeid();
		goto out;
	}

	if (ls->ls_node_array) {
		node = (hash >> 16) % ls->ls_total_weight;
		nodeid = ls->ls_node_array[node];
		goto out;
	}

	/* make_member_array() failed to kmalloc ls_node_array... */

	node = (hash >> 16) % ls->ls_num_nodes;

	list_for_each(tmp, &ls->ls_nodes) {
		if (n++ != node)
			continue;
		memb = list_entry(tmp, struct dlm_member, list);
		break;
	}

	DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n",
				 ls->ls_num_nodes, n, node););
	nodeid = memb->nodeid;
 out:
	return nodeid;
}

int dlm_dir_nodeid(struct dlm_rsb *r)
{
	return dlm_hash2nodeid(r->res_ls, r->res_hash);
}

static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len)
{
	uint32_t val;

	val = jhash(name, len, 0);
	val &= (ls->ls_dirtbl_size - 1);

	return val;
}

static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de)
{
	uint32_t bucket;

	bucket = dir_hash(ls, de->name, de->length);
	list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
}

static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name,
					  int namelen, uint32_t bucket)
{
	struct dlm_direntry *de;

	list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) {
		if (de->length == namelen && !memcmp(name, de->name, namelen))
			goto out;
	}
	de = NULL;
 out:
	return de;
}

void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen)
{
	struct dlm_direntry *de;
	uint32_t bucket;

	bucket = dir_hash(ls, name, namelen);

	write_lock(&ls->ls_dirtbl[bucket].lock);

	de = search_bucket(ls, name, namelen, bucket);

	if (!de) {
		log_error(ls, "remove fr %u none", nodeid);
		goto out;
	}

	if (de->master_nodeid != nodeid) {
		log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid);
		goto out;
	}

	list_del(&de->list);
	free_direntry(de);
 out:
	write_unlock(&ls->ls_dirtbl[bucket].lock);
}

void dlm_dir_clear(struct dlm_ls *ls)
{
	struct list_head *head;
	struct dlm_direntry *de;
	int i;

	DLM_ASSERT(list_empty(&ls->ls_recover_list), );

	for (i = 0; i < ls->ls_dirtbl_size; i++) {
		write_lock(&ls->ls_dirtbl[i].lock);
		head = &ls->ls_dirtbl[i].list;
		while (!list_empty(head)) {
			de = list_entry(head->next, struct dlm_direntry, list);
			list_del(&de->list);
			put_free_de(ls, de);
		}
		write_unlock(&ls->ls_dirtbl[i].lock);
	}
}

int dlm_recover_directory(struct dlm_ls *ls)
{
	struct dlm_member *memb;
	struct dlm_direntry *de;
	char *b, *last_name = NULL;
	int error = -ENOMEM, last_len, count = 0;
	uint16_t namelen;

	log_debug(ls, "dlm_recover_directory");

	if (dlm_no_directory(ls))
		goto out_status;

	dlm_dir_clear(ls);

	last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
	if (!last_name)
		goto out;

	list_for_each_entry(memb, &ls->ls_nodes, list) {
		memset(last_name, 0, DLM_RESNAME_MAXLEN);
		last_len = 0;

		for (;;) {
			error = dlm_recovery_stopped(ls);
			if (error)
				goto out_free;

			error = dlm_rcom_names(ls, memb->nodeid,
					       last_name, last_len);
			if (error)
				goto out_free;

			schedule();

			/*
			 * pick namelen/name pairs out of received buffer
			 */

			b = ls->ls_recover_buf + sizeof(struct dlm_rcom);

			for (;;) {
				memcpy(&namelen, b, sizeof(uint16_t));
				namelen = be16_to_cpu(namelen);
				b += sizeof(uint16_t);

				/* namelen of 0xFFFFF marks end of names for
				   this node; namelen of 0 marks end of the
				   buffer */

				if (namelen == 0xFFFF)
					goto done;
				if (!namelen)
					break;

				error = -ENOMEM;
				de = get_free_de(ls, namelen);
				if (!de)
					goto out_free;

				de->master_nodeid = memb->nodeid;
				de->length = namelen;
				last_len = namelen;
				memcpy(de->name, b, namelen);
				memcpy(last_name, b, namelen);
				b += namelen;

				add_entry_to_hash(ls, de);
				count++;
			}
		}
         done:
		;
	}

 out_status:
	error = 0;
	dlm_set_recover_status(ls, DLM_RS_DIR);
	log_debug(ls, "dlm_recover_directory %d entries", count);
 out_free:
	kfree(last_name);
 out:
	dlm_clear_free_entries(ls);
	return error;
}

static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
		     int namelen, int *r_nodeid)
{
	struct dlm_direntry *de, *tmp;
	uint32_t bucket;

	bucket = dir_hash(ls, name, namelen);

	write_lock(&ls->ls_dirtbl[bucket].lock);
	de = search_bucket(ls, name, namelen, bucket);
	if (de) {
		*r_nodeid = de->master_nodeid;
		write_unlock(&ls->ls_dirtbl[bucket].lock);
		if (*r_nodeid == nodeid)
			return -EEXIST;
		return 0;
	}

	write_unlock(&ls->ls_dirtbl[bucket].lock);

	de = allocate_direntry(ls, namelen);
	if (!de)
		return -ENOMEM;

	de->master_nodeid = nodeid;
	de->length = namelen;
	memcpy(de->name, name, namelen);

	write_lock(&ls->ls_dirtbl[bucket].lock);
	tmp = search_bucket(ls, name, namelen, bucket);
	if (tmp) {
		free_direntry(de);
		de = tmp;
	} else {
		list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
	}
	*r_nodeid = de->master_nodeid;
	write_unlock(&ls->ls_dirtbl[bucket].lock);
	return 0;
}

int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
		   int *r_nodeid)
{
	return get_entry(ls, nodeid, name, namelen, r_nodeid);
}

/* Copy the names of master rsb's into the buffer provided.
   Only select names whose dir node is the given nodeid. */

void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
 			   char *outbuf, int outlen, int nodeid)
{
	struct list_head *list;
	struct dlm_rsb *start_r = NULL, *r = NULL;
	int offset = 0, start_namelen, error, dir_nodeid;
	char *start_name;
	uint16_t be_namelen;

	/*
	 * Find the rsb where we left off (or start again)
	 */

	start_namelen = inlen;
	start_name = inbuf;

	if (start_namelen > 1) {
		/*
		 * We could also use a find_rsb_root() function here that
		 * searched the ls_root_list.
		 */
		error = dlm_find_rsb(ls, start_name, start_namelen, R_MASTER,
				     &start_r);
		DLM_ASSERT(!error && start_r,
			   printk("error %d\n", error););
		DLM_ASSERT(!list_empty(&start_r->res_root_list),
			   dlm_print_rsb(start_r););
		dlm_put_rsb(start_r);
	}

	/*
	 * Send rsb names for rsb's we're master of and whose directory node
	 * matches the requesting node.
	 */

	down_read(&ls->ls_root_sem);
	if (start_r)
		list = start_r->res_root_list.next;
	else
		list = ls->ls_root_list.next;

	for (offset = 0; list != &ls->ls_root_list; list = list->next) {
		r = list_entry(list, struct dlm_rsb, res_root_list);
		if (r->res_nodeid)
			continue;

		dir_nodeid = dlm_dir_nodeid(r);
		if (dir_nodeid != nodeid)
			continue;

		/*
		 * The block ends when we can't fit the following in the
		 * remaining buffer space:
		 * namelen (uint16_t) +
		 * name (r->res_length) +
		 * end-of-block record 0x0000 (uint16_t)
		 */

		if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
			/* Write end-of-block record */
			be_namelen = 0;
			memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
			offset += sizeof(uint16_t);
			goto out;
		}

		be_namelen = cpu_to_be16(r->res_length);
		memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
		offset += sizeof(uint16_t);
		memcpy(outbuf + offset, r->res_name, r->res_length);
		offset += r->res_length;
	}

	/*
	 * If we've reached the end of the list (and there's room) write a
	 * terminating record.
	 */

	if ((list == &ls->ls_root_list) &&
	    (offset + sizeof(uint16_t) <= outlen)) {
		be_namelen = 0xFFFF;
		memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
		offset += sizeof(uint16_t);
	}

 out:
	up_read(&ls->ls_root_sem);
}