diff options
author | Greg Farnum <greg@inktank.com> | 2013-01-29 17:05:56 -0800 |
---|---|---|
committer | Greg Farnum <greg@inktank.com> | 2013-01-29 17:05:56 -0800 |
commit | e9a6694d0151b79c3a3b44cee5df8e3d4dcbfc2c (patch) | |
tree | 6c1abbcad0479c05be2c59e9bab74c7ec92ac713 | |
parent | 7cd4e50ddf103d93308715218ec2735d93d493ac (diff) | |
download | ceph-e9a6694d0151b79c3a3b44cee5df8e3d4dcbfc2c.tar.gz |
client: return errors to the user if fsync fails
To do so, we allow callers of _flush(Inode) to pass in a Context
as well. This Context is then given to the ObjectCacher as
the completion callback instead of the C_Client_PutInode that
it uses by default. _fsync() passes in a SafeCond which is
pointing to its return code, and presto! The return code gets
a failure if the Objecter returns any.
To preserve existing semantics, _fsync() (and any similar
caller) is now required to take a reference to the inode.
However, this is a behavioral change: previously, a call to fsync
in the userspace code would simply spin as it waited for all the
data to flush out. That obviously wasn't desirable, but now a user
can call fsync, ignore the return value, and think that everything is
hunky-dory when in fact the client is repeatedly trying to flush out
the dirty data. Hrm...
This fixes the fsync case of #2753 for ceph-fuse.
Signed-off-by: Greg Farnum <greg@inktank.com>
-rw-r--r-- | src/client/Client.cc | 53 | ||||
-rw-r--r-- | src/client/Client.h | 14 |
2 files changed, 53 insertions, 14 deletions
diff --git a/src/client/Client.cc b/src/client/Client.cc index 705c162da3b..a9f45546453 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2566,7 +2566,7 @@ public: } }; -bool Client::_flush(Inode *in) +bool Client::_flush(Inode *in, Context *onfinish) { ldout(cct, 10) << "_flush " << *in << dendl; @@ -2575,7 +2575,9 @@ bool Client::_flush(Inode *in) return true; } - Context *onfinish = new C_Client_PutInode(this, in); + if (!onfinish) { + onfinish = new C_Client_PutInode(this, in); + } bool safe = objectcacher->flush_set(&in->oset, onfinish); if (safe) { onfinish->complete(0); @@ -5877,11 +5879,19 @@ int Client::_fsync(Fh *f, bool syncdataonly) Inode *in = f->inode; tid_t wait_on_flush = 0; bool flushed_metadata = false; + Mutex lock("Client::_fsync::lock"); + Cond cond; + bool done = false; + C_SafeCond *object_cacher_completion = NULL; ldout(cct, 3) << "_fsync(" << f << ", " << (syncdataonly ? "dataonly)":"data+metadata)") << dendl; - if (cct->_conf->client_oc) - _flush(in); + if (cct->_conf->client_oc) { + object_cacher_completion = new C_SafeCond(&lock, &cond, &done, &r); + in->get(); // take a reference; C_SafeCond doesn't and _flush won't either + _flush(in, object_cacher_completion); + ldout(cct, 15) << "using return-valued form of _fsync" << dendl; + } if (!syncdataonly && (in->dirty_caps & ~CEPH_CAP_ANY_FILE_WR)) { for (map<int, Cap*>::iterator iter = in->caps.begin(); iter != in->caps.end(); ++iter) { @@ -5893,18 +5903,35 @@ int Client::_fsync(Fh *f, bool syncdataonly) flushed_metadata = true; } else ldout(cct, 10) << "no metadata needs to commit" << dendl; - // FIXME: this can starve - while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > 0) { - ldout(cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER] - << " uncommitted, waiting" << dendl; - wait_on_list(in->waitfor_commit); + if (object_cacher_completion) { // wait on a real reply instead of guessing + client_lock.Unlock(); + lock.Lock(); + ldout(cct, 15) << "waiting on data to flush" << dendl; + while (!done) + cond.Wait(lock); + lock.Unlock(); + client_lock.Lock(); + put_inode(in); + ldout(cct, 15) << "got " << r << " from flush writeback" << dendl; + } else { + // FIXME: this can starve + while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > 0) { + ldout(cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER] + << " uncommitted, waiting" << dendl; + wait_on_list(in->waitfor_commit); + } } - if (flushed_metadata) wait_sync_caps(wait_on_flush); //this could wait longer than strictly necessary, - //but on a sync the user can put up with it - - ldout(cct, 10) << "ino " << in->ino << " has no uncommitted writes" << dendl; + if (!r) { + if (flushed_metadata) wait_sync_caps(wait_on_flush); + // this could wait longer than strictly necessary, + // but on a sync the user can put up with it + ldout(cct, 10) << "ino " << in->ino << " has no uncommitted writes" << dendl; + } else { + ldout(cct, 1) << "ino " << in->ino << " failed to commit to disk! " + << cpp_strerror(-r) << dendl; + } return r; } diff --git a/src/client/Client.h b/src/client/Client.h index b3b1f87cf46..3fcdf481ad1 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -451,7 +451,19 @@ protected: void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len, bool keep_caps); void _async_invalidate(Inode *in, int64_t off, int64_t len, bool keep_caps); void _release(Inode *in); - bool _flush(Inode *in); + + /** + * Initiate a flush of the data associated with the given inode. + * If you specify a Context, you are responsible for holding an inode + * reference for the duration of the flush. If not, _flush() will + * take the reference for you. + * @param in The Inode whose data you wish to flush. + * @param c The Context you wish us to complete once the data is + * flushed. If already flushed, this will be called in-line. + * + * @returns true if the data was already flushed, false otherwise. + */ + bool _flush(Inode *in, Context *c=NULL); void _flush_range(Inode *in, int64_t off, uint64_t size); void _flushed(Inode *in); void flush_set_callback(ObjectCacher::ObjectSet *oset); |