summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-04-17 11:05:00 -0700
committerSage Weil <sage@inktank.com>2013-04-17 11:05:00 -0700
commit593507ea7084179b4e1ec94aa6e00656fe5ebb19 (patch)
tree7df18f059597b2b347108a5f52119b85633db8d7
parentf3527d4b514e4c0b4bbdb53c11b9b776a846aaf9 (diff)
parenta993d2565f56892992a6e18500037db35e392dbd (diff)
downloadceph-593507ea7084179b4e1ec94aa6e00656fe5ebb19.tar.gz
Merge branch 'next'
Conflicts: doc/rbd/rbd-cloudstack.rst
-rw-r--r--doc/rbd/rbd-cloudstack.rst2
-rwxr-xr-xqa/workunits/rbd/qemu-iotests.sh2
-rw-r--r--src/client/Client.cc4
-rw-r--r--src/client/ObjecterWriteback.h12
-rw-r--r--src/librbd/LibrbdWriteback.cc69
-rw-r--r--src/librbd/LibrbdWriteback.h28
-rw-r--r--src/librbd/internal.cc3
-rw-r--r--src/os/FileJournal.cc22
-rw-r--r--src/osdc/ObjectCacher.cc37
-rw-r--r--src/osdc/ObjectCacher.h1
-rw-r--r--src/osdc/WritebackHandler.h8
-rw-r--r--src/test/librbd/test_librbd.cc9
-rw-r--r--src/test/osdc/FakeWriteback.cc11
-rw-r--r--src/test/osdc/FakeWriteback.h8
-rw-r--r--udev/95-ceph-osd.rules2
15 files changed, 149 insertions, 69 deletions
diff --git a/doc/rbd/rbd-cloudstack.rst b/doc/rbd/rbd-cloudstack.rst
index a47d3ccb898..fb66978a451 100644
--- a/doc/rbd/rbd-cloudstack.rst
+++ b/doc/rbd/rbd-cloudstack.rst
@@ -118,4 +118,4 @@ Limitations
.. _Add Primary Storage (4.0.0): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.0-incubating/html/Admin_Guide/primary-storage-add.html
.. _Add Primary Storage (4.0.1): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.1-incubating/html/Admin_Guide/primary-storage-add.html
.. _Create a New Disk Offering (4.0.0): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.0-incubating/html/Admin_Guide/compute-disk-service-offerings.html#creating-disk-offerings
-.. _Create a New Disk Offering (4.0.1): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.1-incubating/html/Admin_Guide/compute-disk-service-offerings.html#creating-disk-offerings \ No newline at end of file
+.. _Create a New Disk Offering (4.0.1): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.1-incubating/html/Admin_Guide/compute-disk-service-offerings.html#creating-disk-offerings
diff --git a/qa/workunits/rbd/qemu-iotests.sh b/qa/workunits/rbd/qemu-iotests.sh
index 9031b1db536..284d88aea68 100755
--- a/qa/workunits/rbd/qemu-iotests.sh
+++ b/qa/workunits/rbd/qemu-iotests.sh
@@ -8,7 +8,7 @@
# This will only work with particular qemu versions, like 1.0. Later
# versions of qemu includ qemu-iotests directly in the qemu
# repository.
-git clone git://repo.or.cz/qemu-iotests.git
+git clone git://ceph.com/git/qemu-iotests.git
cd qemu-iotests
mkdir bin
diff --git a/src/client/Client.cc b/src/client/Client.cc
index aae22ffa980..3bc8c5bfa9a 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -1948,9 +1948,9 @@ void Client::send_reconnect(MetaSession *session)
in->exporting_mseq = 0;
if (!in->is_any_caps()) {
ldout(cct, 10) << " removing last cap, closing snaprealm" << dendl;
+ in->snaprealm_item.remove_myself();
put_snap_realm(in->snaprealm);
in->snaprealm = 0;
- in->snaprealm_item.remove_myself();
}
}
}
@@ -3257,8 +3257,8 @@ void Client::handle_snap(MClientSnap *m)
// queue for snap writeback
queue_cap_snap(in, in->snaprealm->get_snap_context().seq);
- put_snap_realm(in->snaprealm);
in->snaprealm_item.remove_myself();
+ put_snap_realm(in->snaprealm);
to_move.push_back(in);
}
}
diff --git a/src/client/ObjecterWriteback.h b/src/client/ObjecterWriteback.h
index 1aa5e4932df..9a10fb48a06 100644
--- a/src/client/ObjecterWriteback.h
+++ b/src/client/ObjecterWriteback.h
@@ -11,12 +11,12 @@ class ObjecterWriteback : public WritebackHandler {
ObjecterWriteback(Objecter *o) : m_objecter(o) {}
virtual ~ObjecterWriteback() {}
- virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snapid,
- bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
- Context *onfinish) {
- return m_objecter->read_trunc(oid, oloc, off, len, snapid, pbl, 0,
- trunc_size, trunc_seq, onfinish);
+ virtual void read(const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
+ Context *onfinish) {
+ m_objecter->read_trunc(oid, oloc, off, len, snapid, pbl, 0,
+ trunc_size, trunc_seq, onfinish);
}
virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid) {
diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc
index 1689ad91860..237901dc61f 100644
--- a/src/librbd/LibrbdWriteback.cc
+++ b/src/librbd/LibrbdWriteback.cc
@@ -48,7 +48,6 @@ namespace librbd {
C_Request(CephContext *cct, Context *c, Mutex *l)
: m_cct(cct), m_ctx(c), m_lock(l) {}
virtual ~C_Request() {}
- void set_req(AioRequest *req);
virtual void finish(int r) {
ldout(m_cct, 20) << "aio_cb completing " << dendl;
{
@@ -63,16 +62,39 @@ namespace librbd {
Mutex *m_lock;
};
+ class C_OrderedWrite : public Context {
+ public:
+ C_OrderedWrite(CephContext *cct, LibrbdWriteback::write_result_d *result,
+ LibrbdWriteback *wb)
+ : m_cct(cct), m_result(result), m_wb_handler(wb) {}
+ virtual ~C_OrderedWrite() {}
+ virtual void finish(int r) {
+ ldout(m_cct, 20) << "C_OrderedWrite completing " << m_result << dendl;
+ {
+ Mutex::Locker l(m_wb_handler->m_lock);
+ assert(!m_result->done);
+ m_result->done = true;
+ m_result->ret = r;
+ m_wb_handler->complete_writes(m_result->oid);
+ }
+ ldout(m_cct, 20) << "C_OrderedWrite finished " << m_result << dendl;
+ }
+ private:
+ CephContext *m_cct;
+ LibrbdWriteback::write_result_d *m_result;
+ LibrbdWriteback *m_wb_handler;
+ };
+
LibrbdWriteback::LibrbdWriteback(ImageCtx *ictx, Mutex& lock)
: m_tid(0), m_lock(lock), m_ictx(ictx)
{
}
- tid_t LibrbdWriteback::read(const object_t& oid,
- const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snapid,
- bufferlist *pbl, uint64_t trunc_size,
- __u32 trunc_seq, Context *onfinish)
+ void LibrbdWriteback::read(const object_t& oid,
+ const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size,
+ __u32 trunc_seq, Context *onfinish)
{
// on completion, take the mutex and then call onfinish.
Context *req = new C_Request(m_ictx->cct, onfinish, &m_lock);
@@ -82,7 +104,6 @@ namespace librbd {
len, off);
rados_completion->release();
assert(r >= 0);
- return ++m_tid;
}
bool LibrbdWriteback::may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid)
@@ -132,8 +153,10 @@ namespace librbd {
object_no, 0, m_ictx->layout.fl_object_size,
objectx);
uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap);
-
- C_Request *req_comp = new C_Request(m_ictx->cct, oncommit, &m_lock);
+ write_result_d *result = new write_result_d(oid.name, oncommit);
+ m_writes[oid.name].push(result);
+ ldout(m_ictx->cct, 20) << "write will wait for result " << result << dendl;
+ C_OrderedWrite *req_comp = new C_OrderedWrite(m_ictx->cct, result, this);
AioWrite *req = new AioWrite(m_ictx, oid.name,
object_no, off, objectx, object_overlap,
bl, snapc, snap_id,
@@ -141,4 +164,32 @@ namespace librbd {
req->send();
return ++m_tid;
}
+
+ void LibrbdWriteback::complete_writes(const std::string& oid)
+ {
+ assert(m_lock.is_locked());
+ std::queue<write_result_d*>& results = m_writes[oid];
+ ldout(m_ictx->cct, 20) << "complete_writes() oid " << oid << dendl;
+ std::list<write_result_d*> finished;
+
+ while (!results.empty()) {
+ write_result_d *result = results.front();
+ if (!result->done)
+ break;
+ finished.push_back(result);
+ results.pop();
+ }
+
+ if (results.empty())
+ m_writes.erase(oid);
+
+ for (std::list<write_result_d*>::iterator it = finished.begin();
+ it != finished.end(); ++it) {
+ write_result_d *result = *it;
+ ldout(m_ictx->cct, 20) << "complete_writes() completing " << result
+ << dendl;
+ result->oncommit->complete(result->ret);
+ delete result;
+ }
+ }
}
diff --git a/src/librbd/LibrbdWriteback.h b/src/librbd/LibrbdWriteback.h
index b054dbc5950..ba8ff1f114d 100644
--- a/src/librbd/LibrbdWriteback.h
+++ b/src/librbd/LibrbdWriteback.h
@@ -3,6 +3,8 @@
#ifndef CEPH_LIBRBD_LIBRBDWRITEBACKHANDLER_H
#define CEPH_LIBRBD_LIBRBDWRITEBACKHANDLER_H
+#include <queue>
+
#include "include/Context.h"
#include "include/types.h"
#include "include/rados/librados.hpp"
@@ -21,10 +23,10 @@ namespace librbd {
virtual ~LibrbdWriteback() {}
// Note that oloc, trunc_size, and trunc_seq are ignored
- virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snapid,
- bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
- Context *onfinish);
+ virtual void read(const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
+ Context *onfinish);
// Determine whether a read to this extent could be affected by a write-triggered copy-on-write
virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid);
@@ -35,10 +37,26 @@ namespace librbd {
const bufferlist &bl, utime_t mtime, uint64_t trunc_size,
__u32 trunc_seq, Context *oncommit);
+ struct write_result_d {
+ bool done;
+ int ret;
+ std::string oid;
+ Context *oncommit;
+ write_result_d(const std::string& oid, Context *oncommit) :
+ done(false), ret(0), oid(oid), oncommit(oncommit) {}
+ private:
+ write_result_d(const write_result_d& rhs);
+ const write_result_d& operator=(const write_result_d& rhs);
+ };
+
private:
- int m_tid;
+ void complete_writes(const std::string& oid);
+
+ tid_t m_tid;
Mutex& m_lock;
librbd::ImageCtx *m_ictx;
+ hash_map<std::string, std::queue<write_result_d*> > m_writes;
+ friend class C_OrderedWrite;
};
}
diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
index 55461aa3a70..c56f0a553b0 100644
--- a/src/librbd/internal.cc
+++ b/src/librbd/internal.cc
@@ -2291,6 +2291,9 @@ reprotect_and_return_err:
ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
<< " len = " << len << dendl;
+ // ensure previous writes are visible to listsnaps
+ _flush(ictx);
+
int r = ictx_check(ictx);
if (r < 0)
return r;
diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc
index d8a6f5a1a68..6e5f94c64e5 100644
--- a/src/os/FileJournal.cc
+++ b/src/os/FileJournal.cc
@@ -1529,6 +1529,17 @@ void FileJournal::committed_thru(uint64_t seq)
dout(5) << "committed_thru " << seq << " (last_committed_seq " << last_committed_seq << ")" << dendl;
last_committed_seq = seq;
+ // completions!
+ {
+ Mutex::Locker locker(finisher_lock);
+ queue_completions_thru(seq);
+ if (plug_journal_completions && seq >= header.start_seq) {
+ dout(10) << " removing completion plug, queuing completions thru journaled_seq " << journaled_seq << dendl;
+ plug_journal_completions = false;
+ queue_completions_thru(journaled_seq);
+ }
+ }
+
// adjust start pointer
while (!journalq.empty() && journalq.front().first <= seq) {
journalq.pop_front();
@@ -1543,17 +1554,6 @@ void FileJournal::committed_thru(uint64_t seq)
must_write_header = true;
print_header();
- {
- Mutex::Locker locker(finisher_lock);
- // completions!
- queue_completions_thru(seq);
- if (plug_journal_completions && seq >= header.start_seq) {
- dout(10) << " removing completion plug, queuing completions thru journaled_seq " << journaled_seq << dendl;
- plug_journal_completions = false;
- queue_completions_thru(journaled_seq);
- }
- }
-
// committed but unjournaled items
while (!writeq_empty() && peek_write().seq <= seq) {
dout(15) << " dropping committed but unwritten seq " << peek_write().seq
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 18a85c0b866..92f0d502746 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -1495,6 +1495,20 @@ bool ObjectCacher::flush(Object *ob, loff_t offset, loff_t length)
return clean;
}
+bool ObjectCacher::_flush_set_finish(C_GatherBuilder *gather, Context *onfinish)
+{
+ assert(lock.is_locked());
+ if (gather->has_subs()) {
+ gather->set_finisher(onfinish);
+ gather->activate();
+ return false;
+ }
+
+ ldout(cct, 10) << "flush_set has no dirty|tx bhs" << dendl;
+ onfinish->complete(0);
+ return true;
+}
+
// flush. non-blocking, takes callback.
// returns true if already flushed
bool ObjectCacher::flush_set(ObjectSet *oset, Context *onfinish)
@@ -1526,15 +1540,7 @@ bool ObjectCacher::flush_set(ObjectSet *oset, Context *onfinish)
}
}
- if (gather.has_subs()) {
- gather.set_finisher(onfinish);
- gather.activate();
- return false;
- } else {
- ldout(cct, 10) << "flush_set " << oset << " has no dirty|tx bhs" << dendl;
- onfinish->complete(0);
- return true;
- }
+ return _flush_set_finish(&gather, onfinish);
}
// flush. non-blocking, takes callback.
@@ -1549,7 +1555,8 @@ bool ObjectCacher::flush_set(ObjectSet *oset, vector<ObjectExtent>& exv, Context
return true;
}
- ldout(cct, 10) << "flush_set " << oset << " on " << exv.size() << " ObjectExtents" << dendl;
+ ldout(cct, 10) << "flush_set " << oset << " on " << exv.size()
+ << " ObjectExtents" << dendl;
// we'll need to wait for all objects to flush!
C_GatherBuilder gather(cct);
@@ -1573,15 +1580,7 @@ bool ObjectCacher::flush_set(ObjectSet *oset, vector<ObjectExtent>& exv, Context
}
}
- if (gather.has_subs()) {
- gather.set_finisher(onfinish);
- gather.activate();
- return false;
- } else {
- ldout(cct, 10) << "flush_set " << oset << " has no dirty|tx bhs" << dendl;
- onfinish->complete(0);
- return true;
- }
+ return _flush_set_finish(&gather, onfinish);
}
void ObjectCacher::purge_set(ObjectSet *oset)
diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h
index 681b02406fa..a17046f9126 100644
--- a/src/osdc/ObjectCacher.h
+++ b/src/osdc/ObjectCacher.h
@@ -573,6 +573,7 @@ private:
int _wait_for_write(OSDWrite *wr, uint64_t len, ObjectSet *oset, Mutex& lock,
Context *onfreespace);
void maybe_wait_for_writeback(uint64_t len);
+ bool _flush_set_finish(C_GatherBuilder *gather, Context *onfinish);
public:
bool set_is_cached(ObjectSet *oset);
diff --git a/src/osdc/WritebackHandler.h b/src/osdc/WritebackHandler.h
index a9a035ca52f..17e1f683bec 100644
--- a/src/osdc/WritebackHandler.h
+++ b/src/osdc/WritebackHandler.h
@@ -12,10 +12,10 @@ class WritebackHandler {
WritebackHandler() {}
virtual ~WritebackHandler() {}
- virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snapid,
- bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
- Context *onfinish) = 0;
+ virtual void read(const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
+ Context *onfinish) = 0;
/**
* check if a given extent read result may change due to a write
*
diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc
index 2051b716963..0094d650440 100644
--- a/src/test/librbd/test_librbd.cc
+++ b/src/test/librbd/test_librbd.cc
@@ -1561,6 +1561,10 @@ TEST(LibRBD, DiffIterate)
ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+ int seed = getpid();
+ cout << "seed " << seed << std::endl;
+ srand(seed);
+
{
librbd::RBD rbd;
librbd::Image image;
@@ -1627,6 +1631,10 @@ TEST(LibRBD, DiffIterateDiscard)
ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+ int seed = getpid();
+ cout << "seed " << seed << std::endl;
+ srand(seed);
+
{
librbd::RBD rbd;
librbd::Image image;
@@ -1645,6 +1653,7 @@ TEST(LibRBD, DiffIterateDiscard)
ASSERT_EQ(0u, extents.size());
char data[256];
+ memset(data, 1, sizeof(data));
bl.append(data, 256);
ASSERT_EQ(256, image.write(0, 256, bl));
ASSERT_EQ(0, image.diff_iterate(NULL, 0, size,
diff --git a/src/test/osdc/FakeWriteback.cc b/src/test/osdc/FakeWriteback.cc
index 4445140a6f5..b4cd35ea979 100644
--- a/src/test/osdc/FakeWriteback.cc
+++ b/src/test/osdc/FakeWriteback.cc
@@ -58,15 +58,14 @@ FakeWriteback::~FakeWriteback()
delete m_finisher;
}
-tid_t FakeWriteback::read(const object_t& oid,
- const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snapid,
- bufferlist *pbl, uint64_t trunc_size,
- __u32 trunc_seq, Context *onfinish)
+void FakeWriteback::read(const object_t& oid,
+ const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size,
+ __u32 trunc_seq, Context *onfinish)
{
C_Delay *wrapper = new C_Delay(m_cct, onfinish, m_lock, off, pbl, m_delay_ns);
m_finisher->queue(wrapper, len);
- return m_tid.inc();
}
tid_t FakeWriteback::write(const object_t& oid,
diff --git a/src/test/osdc/FakeWriteback.h b/src/test/osdc/FakeWriteback.h
index ff48592d728..e7d6dc16bb4 100644
--- a/src/test/osdc/FakeWriteback.h
+++ b/src/test/osdc/FakeWriteback.h
@@ -17,10 +17,10 @@ public:
FakeWriteback(CephContext *cct, Mutex *lock, uint64_t delay_ns);
virtual ~FakeWriteback();
- virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snapid,
- bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
- Context *onfinish);
+ virtual void read(const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
+ Context *onfinish);
virtual tid_t write(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, const SnapContext& snapc,
diff --git a/udev/95-ceph-osd.rules b/udev/95-ceph-osd.rules
index a6fcaea8823..77e6ef37c5d 100644
--- a/udev/95-ceph-osd.rules
+++ b/udev/95-ceph-osd.rules
@@ -17,5 +17,5 @@ ACTION=="add" SUBSYSTEM=="block", \
ENV{DEVTYPE}=="partition", \
ENV{ID_PART_ENTRY_TYPE}=="4fbd7e29-9d25-41b8-afd0-5ec00ceff05d", \
RUN+="/sbin/cryptsetup --key-file /etc/ceph/dmcrypt-keys/$env{ID_PART_ENTRY_UUID} --key-size 256 create $env{ID_PART_ENTRY_UUID} /dev/$name", \
- RUN+="bash -c 'while [ ! -e /dev/mapper/$env{ID_PART_ENTRY_UUID} ];do sleep 1; done'", \
+ RUN+="/bin/bash -c 'while [ ! -e /dev/mapper/$env{ID_PART_ENTRY_UUID} ];do sleep 1; done'", \
RUN+="/usr/sbin/ceph-disk-activate --mount /dev/mapper/$env{ID_PART_ENTRY_UUID}"