diff options
author | Sage Weil <sage@newdream.net> | 2009-02-17 17:13:31 -0800 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2009-02-19 15:25:46 -0800 |
commit | 100fe763d429be3b24e05216e64d5964da9ab163 (patch) | |
tree | f014ecf7a69bbb5d3f29e28fb5d71f26e04e7be5 | |
parent | 86c92a277dd9761cfd9ee547382e5e88f79087f9 (diff) | |
download | ceph-100fe763d429be3b24e05216e64d5964da9ab163.tar.gz |
kclient: prealloc ino queue
-rw-r--r-- | src/kernel/mds_client.c | 119 | ||||
-rw-r--r-- | src/kernel/mds_client.h | 18 | ||||
-rw-r--r-- | src/kernel/mdsmap.c | 1 | ||||
-rw-r--r-- | src/kernel/mdsmap.h | 2 | ||||
-rw-r--r-- | src/kernel/super.h | 1 |
5 files changed, 138 insertions, 3 deletions
diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index 946da1431c0..4d428c57b83 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -499,6 +499,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, return req->r_resend_mds; } + if (mode == USE_TABLE_MDS) + return mdsc->mdsmap->m_table; + if (mode == USE_CAP_MDS) { mds = ceph_get_cap_mds(dentry->d_inode); if (mds >= 0) { @@ -632,6 +635,117 @@ out: } /* + * ino preallocation + */ +u64 ceph_mdsc_prealloc_dequeue(struct ceph_mds_client *mdsc) +{ + u64 r = 0; + + mutex_lock(&mdsc->inoq.mutex); + if (mdsc->inoq.numi) { + r = mdsc->inoq.inos[mdsc->inoq.head].start; + mdsc->inoq.inos[mdsc->inoq.head].start++; + mdsc->inoq.inos[mdsc->inoq.head].len--; + mdsc->inoq.numi--; + if (mdsc->inoq.inos[mdsc->inoq.head].len == 0) { + mdsc->inoq.num--; + mdsc->inoq.head++; + if (mdsc->inoq.head == mdsc->inoq.max) + mdsc->inoq.head = 0; + } + } + mutex_unlock(&mdsc->inoq.mutex); + dout(20, "prealloc_dequeue %llx\n", r); + return r; +} + +int prealloc_enqueue(struct ceph_mds_client *mdsc, u64 first, int num) +{ + int room; + int ret; + + dout(10, "prealloc_enqueue %llx~%d\n", first, num); + mutex_lock(&mdsc->inoq.mutex); + + mdsc->inoq.requesting -= num; + + room = mdsc->inoq.max - mdsc->inoq.num; + if (!room) { + /* realloc */ + int newlen = mdsc->inoq.num ? mdsc->inoq.num*2 : 4; + struct ceph_ino_extent *newq = + kmalloc(newlen * sizeof(*newq), GFP_NOFS); + int a, b; + + dout(20, "prealloc_enqueue realloc %d\n", newlen); + ret = -ENOMEM; + if (!newq) + goto out; + if (mdsc->inoq.head > mdsc->inoq.tail) { + a = mdsc->inoq.num - mdsc->inoq.head; + b = mdsc->inoq.tail; + } else { + a = mdsc->inoq.tail - mdsc->inoq.head; + b = 0; + } + memcpy(newq, mdsc->inoq.inos + mdsc->inoq.head, + a*sizeof(u64)); + if (b) + memcpy(newq + a, mdsc->inoq.inos, b*sizeof(*newq)); + kfree(mdsc->inoq.inos); + mdsc->inoq.inos = newq; + mdsc->inoq.head = 0; + mdsc->inoq.tail = mdsc->inoq.num; + } + + mdsc->inoq.inos[mdsc->inoq.tail].start = first; + mdsc->inoq.inos[mdsc->inoq.tail].len = num; + mdsc->inoq.num++; + mdsc->inoq.numi += num; + if (mdsc->inoq.tail == mdsc->inoq.max) + mdsc->inoq.tail = 0; + + ret = 0; +out: + mutex_unlock(&mdsc->inoq.mutex); + return ret; +} + +int request_prealloc(struct ceph_mds_client *mdsc) +{ + struct ceph_mds_request *req; + struct ceph_mds_request_head *rhead; + int target = 1024; + int num, err; + + mutex_lock(&mdsc->inoq.mutex); + num = target - mdsc->inoq.numi - mdsc->inoq.requesting; + dout(10, "request_prealloc have %d want %d requesting %d .. %d\n", + mdsc->inoq.num, target, mdsc->inoq.requesting, num); + if (target < mdsc->inoq.numi + mdsc->inoq.requesting || + num < mdsc->inoq.requesting) { + mutex_unlock(&mdsc->inoq.mutex); + return 0; + } + mdsc->inoq.requesting += num; + mutex_unlock(&mdsc->inoq.mutex); + + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_PREALLOC, + 0, NULL, 0, NULL, NULL, USE_TABLE_MDS); + if (IS_ERR(req)) + return PTR_ERR(req); + rhead = req->r_request->front.iov_base; + rhead->args.prealloc.num = cpu_to_le32(num); + err = ceph_mdsc_do_request(mdsc, NULL, req); + if (err > 0) { + struct ceph_mds_reply_head *h = req->r_reply->front.iov_base; + prealloc_enqueue(mdsc, le64_to_cpu(h->ino), err); + } + ceph_mdsc_put_request(req); + return err; +} + +/* * caller must hold session s_mutex */ static void remove_session_caps(struct ceph_mds_session *session) @@ -2090,6 +2204,8 @@ static void delayed_work(struct work_struct *work) if (want_map) ceph_monc_request_mdsmap(&mdsc->client->monc, want_map); + request_prealloc(mdsc); + schedule_delayed(mdsc); } @@ -2104,6 +2220,8 @@ void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) mdsc->sessions = NULL; mdsc->max_sessions = 0; mdsc->stopping = 0; + memset(&mdsc->inoq, 0, sizeof(mdsc->inoq)); + mutex_init(&mdsc->inoq.mutex); init_rwsem(&mdsc->snap_rwsem); INIT_RADIX_TREE(&mdsc->snap_realms, GFP_NOFS); INIT_LIST_HEAD(&mdsc->snap_empty); @@ -2334,5 +2452,4 @@ bad: return; } - /* eof */ diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 7b07a63102f..dbd424ff2f2 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -132,6 +132,7 @@ enum { USE_RANDOM_MDS, USE_CAP_MDS, /* prefer mds we hold caps from */ USE_AUTH_MDS, /* prefer authoritative mds for this metadata item */ + USE_TABLE_MDS, }; struct ceph_mds_request; @@ -194,6 +195,19 @@ struct ceph_mds_request { }; /* + * Ino number preallocation queue (circular buffer) + */ +struct ceph_ino_extent { + u64 start, len; +}; +struct ceph_ino_queue { + struct mutex mutex; + int head, tail, num, max, numi; + struct ceph_ino_extent *inos; + int requesting; +}; + +/* * mds client state */ struct ceph_mds_client { @@ -208,6 +222,8 @@ struct ceph_mds_client { int max_sessions; /* len of s_mds_sessions */ int stopping; /* true if shutting down */ + struct ceph_ino_queue inoq; + /* * snap_rwsem will cover cap linkage into snaprealms, and * realm snap contexts. (later, we can do per-realm snap @@ -296,4 +312,6 @@ extern void ceph_mdsc_flushed_all_caps(struct ceph_mds_client *mdsc, extern struct ceph_mds_request *ceph_mdsc_get_listener_req(struct inode *inode, u64 tid); +extern u64 ceph_mdsc_prealloc_ino(struct ceph_mds_client *mdsc); + #endif diff --git a/src/kernel/mdsmap.c b/src/kernel/mdsmap.c index 9704efa2ad2..e11333dbb48 100644 --- a/src/kernel/mdsmap.c +++ b/src/kernel/mdsmap.c @@ -62,6 +62,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ceph_decode_32(p, m->m_client_epoch); ceph_decode_32(p, m->m_last_failure); ceph_decode_32(p, m->m_root); + ceph_decode_32(p, m->m_table); ceph_decode_32(p, m->m_session_timeout); ceph_decode_32(p, m->m_session_autoclose); ceph_decode_32(p, m->m_max_mds); diff --git a/src/kernel/mdsmap.h b/src/kernel/mdsmap.h index b50e298402b..0684332b5e7 100644 --- a/src/kernel/mdsmap.h +++ b/src/kernel/mdsmap.h @@ -10,7 +10,7 @@ */ struct ceph_mdsmap { u32 m_epoch, m_client_epoch, m_last_failure; - u32 m_root; + u32 m_root, m_table; u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u32 m_max_mds; /* size of m_addr, m_state arrays */ diff --git a/src/kernel/super.h b/src/kernel/super.h index 84aa5e0fe4f..f979e87ae8a 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -85,7 +85,6 @@ extern struct kobject *ceph_kobj; extern struct list_head ceph_clients; extern spinlock_t ceph_clients_list_lock; - /* * per-filesystem client state * |