summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2009-02-17 17:13:31 -0800
committerSage Weil <sage@newdream.net>2009-02-19 15:25:46 -0800
commit100fe763d429be3b24e05216e64d5964da9ab163 (patch)
treef014ecf7a69bbb5d3f29e28fb5d71f26e04e7be5
parent86c92a277dd9761cfd9ee547382e5e88f79087f9 (diff)
downloadceph-100fe763d429be3b24e05216e64d5964da9ab163.tar.gz
kclient: prealloc ino queue
-rw-r--r--src/kernel/mds_client.c119
-rw-r--r--src/kernel/mds_client.h18
-rw-r--r--src/kernel/mdsmap.c1
-rw-r--r--src/kernel/mdsmap.h2
-rw-r--r--src/kernel/super.h1
5 files changed, 138 insertions, 3 deletions
diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c
index 946da1431c0..4d428c57b83 100644
--- a/src/kernel/mds_client.c
+++ b/src/kernel/mds_client.c
@@ -499,6 +499,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
return req->r_resend_mds;
}
+ if (mode == USE_TABLE_MDS)
+ return mdsc->mdsmap->m_table;
+
if (mode == USE_CAP_MDS) {
mds = ceph_get_cap_mds(dentry->d_inode);
if (mds >= 0) {
@@ -632,6 +635,117 @@ out:
}
/*
+ * ino preallocation
+ */
+u64 ceph_mdsc_prealloc_dequeue(struct ceph_mds_client *mdsc)
+{
+ u64 r = 0;
+
+ mutex_lock(&mdsc->inoq.mutex);
+ if (mdsc->inoq.numi) {
+ r = mdsc->inoq.inos[mdsc->inoq.head].start;
+ mdsc->inoq.inos[mdsc->inoq.head].start++;
+ mdsc->inoq.inos[mdsc->inoq.head].len--;
+ mdsc->inoq.numi--;
+ if (mdsc->inoq.inos[mdsc->inoq.head].len == 0) {
+ mdsc->inoq.num--;
+ mdsc->inoq.head++;
+ if (mdsc->inoq.head == mdsc->inoq.max)
+ mdsc->inoq.head = 0;
+ }
+ }
+ mutex_unlock(&mdsc->inoq.mutex);
+ dout(20, "prealloc_dequeue %llx\n", r);
+ return r;
+}
+
+int prealloc_enqueue(struct ceph_mds_client *mdsc, u64 first, int num)
+{
+ int room;
+ int ret;
+
+ dout(10, "prealloc_enqueue %llx~%d\n", first, num);
+ mutex_lock(&mdsc->inoq.mutex);
+
+ mdsc->inoq.requesting -= num;
+
+ room = mdsc->inoq.max - mdsc->inoq.num;
+ if (!room) {
+ /* realloc */
+ int newlen = mdsc->inoq.num ? mdsc->inoq.num*2 : 4;
+ struct ceph_ino_extent *newq =
+ kmalloc(newlen * sizeof(*newq), GFP_NOFS);
+ int a, b;
+
+ dout(20, "prealloc_enqueue realloc %d\n", newlen);
+ ret = -ENOMEM;
+ if (!newq)
+ goto out;
+ if (mdsc->inoq.head > mdsc->inoq.tail) {
+ a = mdsc->inoq.num - mdsc->inoq.head;
+ b = mdsc->inoq.tail;
+ } else {
+ a = mdsc->inoq.tail - mdsc->inoq.head;
+ b = 0;
+ }
+ memcpy(newq, mdsc->inoq.inos + mdsc->inoq.head,
+ a*sizeof(u64));
+ if (b)
+ memcpy(newq + a, mdsc->inoq.inos, b*sizeof(*newq));
+ kfree(mdsc->inoq.inos);
+ mdsc->inoq.inos = newq;
+ mdsc->inoq.head = 0;
+ mdsc->inoq.tail = mdsc->inoq.num;
+ }
+
+ mdsc->inoq.inos[mdsc->inoq.tail].start = first;
+ mdsc->inoq.inos[mdsc->inoq.tail].len = num;
+ mdsc->inoq.num++;
+ mdsc->inoq.numi += num;
+ if (mdsc->inoq.tail == mdsc->inoq.max)
+ mdsc->inoq.tail = 0;
+
+ ret = 0;
+out:
+ mutex_unlock(&mdsc->inoq.mutex);
+ return ret;
+}
+
+int request_prealloc(struct ceph_mds_client *mdsc)
+{
+ struct ceph_mds_request *req;
+ struct ceph_mds_request_head *rhead;
+ int target = 1024;
+ int num, err;
+
+ mutex_lock(&mdsc->inoq.mutex);
+ num = target - mdsc->inoq.numi - mdsc->inoq.requesting;
+ dout(10, "request_prealloc have %d want %d requesting %d .. %d\n",
+ mdsc->inoq.num, target, mdsc->inoq.requesting, num);
+ if (target < mdsc->inoq.numi + mdsc->inoq.requesting ||
+ num < mdsc->inoq.requesting) {
+ mutex_unlock(&mdsc->inoq.mutex);
+ return 0;
+ }
+ mdsc->inoq.requesting += num;
+ mutex_unlock(&mdsc->inoq.mutex);
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_PREALLOC,
+ 0, NULL, 0, NULL, NULL, USE_TABLE_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ rhead = req->r_request->front.iov_base;
+ rhead->args.prealloc.num = cpu_to_le32(num);
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err > 0) {
+ struct ceph_mds_reply_head *h = req->r_reply->front.iov_base;
+ prealloc_enqueue(mdsc, le64_to_cpu(h->ino), err);
+ }
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+/*
* caller must hold session s_mutex
*/
static void remove_session_caps(struct ceph_mds_session *session)
@@ -2090,6 +2204,8 @@ static void delayed_work(struct work_struct *work)
if (want_map)
ceph_monc_request_mdsmap(&mdsc->client->monc, want_map);
+ request_prealloc(mdsc);
+
schedule_delayed(mdsc);
}
@@ -2104,6 +2220,8 @@ void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
mdsc->sessions = NULL;
mdsc->max_sessions = 0;
mdsc->stopping = 0;
+ memset(&mdsc->inoq, 0, sizeof(mdsc->inoq));
+ mutex_init(&mdsc->inoq.mutex);
init_rwsem(&mdsc->snap_rwsem);
INIT_RADIX_TREE(&mdsc->snap_realms, GFP_NOFS);
INIT_LIST_HEAD(&mdsc->snap_empty);
@@ -2334,5 +2452,4 @@ bad:
return;
}
-
/* eof */
diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h
index 7b07a63102f..dbd424ff2f2 100644
--- a/src/kernel/mds_client.h
+++ b/src/kernel/mds_client.h
@@ -132,6 +132,7 @@ enum {
USE_RANDOM_MDS,
USE_CAP_MDS, /* prefer mds we hold caps from */
USE_AUTH_MDS, /* prefer authoritative mds for this metadata item */
+ USE_TABLE_MDS,
};
struct ceph_mds_request;
@@ -194,6 +195,19 @@ struct ceph_mds_request {
};
/*
+ * Ino number preallocation queue (circular buffer)
+ */
+struct ceph_ino_extent {
+ u64 start, len;
+};
+struct ceph_ino_queue {
+ struct mutex mutex;
+ int head, tail, num, max, numi;
+ struct ceph_ino_extent *inos;
+ int requesting;
+};
+
+/*
* mds client state
*/
struct ceph_mds_client {
@@ -208,6 +222,8 @@ struct ceph_mds_client {
int max_sessions; /* len of s_mds_sessions */
int stopping; /* true if shutting down */
+ struct ceph_ino_queue inoq;
+
/*
* snap_rwsem will cover cap linkage into snaprealms, and
* realm snap contexts. (later, we can do per-realm snap
@@ -296,4 +312,6 @@ extern void ceph_mdsc_flushed_all_caps(struct ceph_mds_client *mdsc,
extern struct ceph_mds_request *ceph_mdsc_get_listener_req(struct inode *inode,
u64 tid);
+extern u64 ceph_mdsc_prealloc_ino(struct ceph_mds_client *mdsc);
+
#endif
diff --git a/src/kernel/mdsmap.c b/src/kernel/mdsmap.c
index 9704efa2ad2..e11333dbb48 100644
--- a/src/kernel/mdsmap.c
+++ b/src/kernel/mdsmap.c
@@ -62,6 +62,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
ceph_decode_32(p, m->m_client_epoch);
ceph_decode_32(p, m->m_last_failure);
ceph_decode_32(p, m->m_root);
+ ceph_decode_32(p, m->m_table);
ceph_decode_32(p, m->m_session_timeout);
ceph_decode_32(p, m->m_session_autoclose);
ceph_decode_32(p, m->m_max_mds);
diff --git a/src/kernel/mdsmap.h b/src/kernel/mdsmap.h
index b50e298402b..0684332b5e7 100644
--- a/src/kernel/mdsmap.h
+++ b/src/kernel/mdsmap.h
@@ -10,7 +10,7 @@
*/
struct ceph_mdsmap {
u32 m_epoch, m_client_epoch, m_last_failure;
- u32 m_root;
+ u32 m_root, m_table;
u32 m_session_timeout; /* seconds */
u32 m_session_autoclose; /* seconds */
u32 m_max_mds; /* size of m_addr, m_state arrays */
diff --git a/src/kernel/super.h b/src/kernel/super.h
index 84aa5e0fe4f..f979e87ae8a 100644
--- a/src/kernel/super.h
+++ b/src/kernel/super.h
@@ -85,7 +85,6 @@ extern struct kobject *ceph_kobj;
extern struct list_head ceph_clients;
extern spinlock_t ceph_clients_list_lock;
-
/*
* per-filesystem client state
*