diff options
author | Kim van der Riet <kpvdr@apache.org> | 2013-02-28 16:14:30 +0000 |
---|---|---|
committer | Kim van der Riet <kpvdr@apache.org> | 2013-02-28 16:14:30 +0000 |
commit | 9c73ef7a5ac10acd6a50d5d52bd721fc2faa5919 (patch) | |
tree | 2a890e1df09e5b896a9b4168a7b22648f559a1f2 /cpp/src/qpid/legacystore | |
parent | 172d9b2a16cfb817bbe632d050acba7e31401cd2 (diff) | |
download | qpid-python-asyncstore.tar.gz |
Update from trunk r1375509 through r1450773asyncstore
git-svn-id: https://svn.apache.org/repos/asf/qpid/branches/asyncstore@1451244 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'cpp/src/qpid/legacystore')
84 files changed, 17928 insertions, 0 deletions
diff --git a/cpp/src/qpid/legacystore/BindingDbt.cpp b/cpp/src/qpid/legacystore/BindingDbt.cpp new file mode 100644 index 0000000000..a48c156e71 --- /dev/null +++ b/cpp/src/qpid/legacystore/BindingDbt.cpp @@ -0,0 +1,50 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/BindingDbt.h" + +namespace mrg { +namespace msgstore { + +BindingDbt::BindingDbt(const qpid::broker::PersistableExchange& e, const qpid::broker::PersistableQueue& q, const std::string& k, const qpid::framing::FieldTable& a) + : data(new char[encodedSize(e, q, k, a)]), + buffer(data, encodedSize(e, q, k, a)) +{ + buffer.putLongLong(q.getPersistenceId()); + buffer.putShortString(q.getName()); + buffer.putShortString(k); + buffer.put(a); + + set_data(data); + set_size(encodedSize(e, q, k, a)); +} + +BindingDbt::~BindingDbt() +{ + delete [] data; +} + +uint32_t BindingDbt::encodedSize(const qpid::broker::PersistableExchange& /*not used*/, const qpid::broker::PersistableQueue& q, const std::string& k, const qpid::framing::FieldTable& a) +{ + return 8 /*queue id*/ + q.getName().size() + 1 + k.size() + 1 + a.encodedSize(); +} + +}} diff --git a/cpp/src/qpid/legacystore/BindingDbt.h b/cpp/src/qpid/legacystore/BindingDbt.h new file mode 100644 index 0000000000..63c7cd144e --- /dev/null +++ b/cpp/src/qpid/legacystore/BindingDbt.h @@ -0,0 +1,56 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_BINDINGDBT_H +#define QPID_LEGACYSTORE_BINDINGDBT_H + +#include "db-inc.h" +#include "qpid/broker/PersistableExchange.h" +#include "qpid/broker/PersistableQueue.h" +#include "qpid/framing/Buffer.h" +#include "qpid/framing/FieldTable.h" + +namespace mrg{ +namespace msgstore{ + +class BindingDbt : public Dbt +{ + char* data; + qpid::framing::Buffer buffer; + + static uint32_t encodedSize(const qpid::broker::PersistableExchange& e, + const qpid::broker::PersistableQueue& q, + const std::string& k, + const qpid::framing::FieldTable& a); + +public: + BindingDbt(const qpid::broker::PersistableExchange& e, + const qpid::broker::PersistableQueue& q, + const std::string& k, + const qpid::framing::FieldTable& a); + + virtual ~BindingDbt(); + +}; + +}} + +#endif // ifndef QPID_LEGACYSTORE_BINDINGDBT_H diff --git a/cpp/src/qpid/legacystore/BufferValue.cpp b/cpp/src/qpid/legacystore/BufferValue.cpp new file mode 100644 index 0000000000..fb2c471cd7 --- /dev/null +++ b/cpp/src/qpid/legacystore/BufferValue.cpp @@ -0,0 +1,56 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/BufferValue.h" + +namespace mrg { +namespace msgstore { + + + +BufferValue::BufferValue(u_int32_t size, u_int64_t offset) + : data(new char[size]), + buffer(data, size) +{ + set_data(data); + set_size(size); + set_flags(DB_DBT_USERMEM | DB_DBT_PARTIAL); + set_doff(offset); + set_dlen(size); + set_ulen(size); +} + +BufferValue::BufferValue(const qpid::broker::Persistable& p) + : data(new char[p.encodedSize()]), + buffer(data, p.encodedSize()) +{ + p.encode(buffer); + + set_data(data); + set_size(p.encodedSize()); +} + +BufferValue::~BufferValue() +{ + delete [] data; +} + +}} diff --git a/cpp/src/qpid/legacystore/BufferValue.h b/cpp/src/qpid/legacystore/BufferValue.h new file mode 100644 index 0000000000..527fbcf577 --- /dev/null +++ b/cpp/src/qpid/legacystore/BufferValue.h @@ -0,0 +1,46 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_BUFFERVALUE_H +#define QPID_LEGACYSTORE_BUFFERVALUE_H + +#include "db-inc.h" +#include "qpid/broker/Persistable.h" +#include "qpid/framing/Buffer.h" + +namespace mrg{ +namespace msgstore{ + +class BufferValue : public Dbt +{ + char* data; + +public: + qpid::framing::Buffer buffer; + + BufferValue(u_int32_t size, u_int64_t offset); + BufferValue(const qpid::broker::Persistable& p); + virtual ~BufferValue(); +}; + +}} + +#endif // ifndef QPID_LEGACYSTORE_BUFFERVALUE_H diff --git a/cpp/src/qpid/legacystore/Cursor.h b/cpp/src/qpid/legacystore/Cursor.h new file mode 100644 index 0000000000..0c869c29a0 --- /dev/null +++ b/cpp/src/qpid/legacystore/Cursor.h @@ -0,0 +1,50 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_CURSOR_H +#define QPID_LEGACYSTORE_CURSOR_H + +#include <boost/shared_ptr.hpp> +#include "db-inc.h" + +namespace mrg{ +namespace msgstore{ + +class Cursor +{ + Dbc* cursor; +public: + typedef boost::shared_ptr<Db> db_ptr; + + Cursor() : cursor(0) {} + virtual ~Cursor() { if(cursor) cursor->close(); } + + void open(db_ptr db, DbTxn* txn, u_int32_t flags = 0) { db->cursor(txn, &cursor, flags); } + void close() { if(cursor) cursor->close(); cursor = 0; } + Dbc* get() { return cursor; } + Dbc* operator->() { return cursor; } + bool next(Dbt& key, Dbt& value) { return cursor->get(&key, &value, DB_NEXT) == 0; } + bool current(Dbt& key, Dbt& value) { return cursor->get(&key, &value, DB_CURRENT) == 0; } +}; + +}} + +#endif // ifndef QPID_LEGACYSTORE_CURSOR_H diff --git a/cpp/src/qpid/legacystore/DataTokenImpl.cpp b/cpp/src/qpid/legacystore/DataTokenImpl.cpp new file mode 100644 index 0000000000..796d4c02f0 --- /dev/null +++ b/cpp/src/qpid/legacystore/DataTokenImpl.cpp @@ -0,0 +1,28 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/DataTokenImpl.h" + +using namespace mrg::msgstore; + +DataTokenImpl::DataTokenImpl():data_tok() {} + +DataTokenImpl::~DataTokenImpl() {} diff --git a/cpp/src/qpid/legacystore/DataTokenImpl.h b/cpp/src/qpid/legacystore/DataTokenImpl.h new file mode 100644 index 0000000000..e01d471e1b --- /dev/null +++ b/cpp/src/qpid/legacystore/DataTokenImpl.h @@ -0,0 +1,47 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_DATATOKENIMPL_H +#define QPID_LEGACYSTORE_DATATOKENIMPL_H + +#include "qpid/legacystore/jrnl/data_tok.h" +#include "qpid/broker/PersistableMessage.h" +#include <boost/intrusive_ptr.hpp> + +namespace mrg { +namespace msgstore { + +class DataTokenImpl : public journal::data_tok, public qpid::RefCounted +{ + private: + boost::intrusive_ptr<qpid::broker::PersistableMessage> sourceMsg; + public: + DataTokenImpl(); + virtual ~DataTokenImpl(); + + inline boost::intrusive_ptr<qpid::broker::PersistableMessage>& getSourceMessage() { return sourceMsg; } + inline void setSourceMessage(const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg) { sourceMsg = msg; } +}; + +} // namespace msgstore +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_DATATOKENIMPL_H diff --git a/cpp/src/qpid/legacystore/IdDbt.cpp b/cpp/src/qpid/legacystore/IdDbt.cpp new file mode 100644 index 0000000000..d9edaf80e6 --- /dev/null +++ b/cpp/src/qpid/legacystore/IdDbt.cpp @@ -0,0 +1,42 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/IdDbt.h" + +using namespace mrg::msgstore; + +IdDbt::IdDbt() : id(0) +{ + init(); +} + +IdDbt::IdDbt(u_int64_t _id) : id(_id) +{ + init(); +} + +void IdDbt::init() +{ + set_data(&id); + set_size(sizeof(u_int64_t)); + set_ulen(sizeof(u_int64_t)); + set_flags(DB_DBT_USERMEM); +} diff --git a/cpp/src/qpid/legacystore/IdDbt.h b/cpp/src/qpid/legacystore/IdDbt.h new file mode 100644 index 0000000000..ecf5922963 --- /dev/null +++ b/cpp/src/qpid/legacystore/IdDbt.h @@ -0,0 +1,42 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_IDDBT_H +#define QPID_LEGACYSTORE_IDDBT_H + +#include "db-inc.h" + +namespace mrg{ +namespace msgstore{ + +class IdDbt : public Dbt +{ + void init(); +public: + u_int64_t id; + + IdDbt(u_int64_t id); + IdDbt(); +}; + +}} + +#endif // ifndef QPID_LEGACYSTORE_IDDBT_H diff --git a/cpp/src/qpid/legacystore/IdSequence.cpp b/cpp/src/qpid/legacystore/IdSequence.cpp new file mode 100644 index 0000000000..975b1107e7 --- /dev/null +++ b/cpp/src/qpid/legacystore/IdSequence.cpp @@ -0,0 +1,40 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/IdSequence.h" + +using namespace mrg::msgstore; +using qpid::sys::Mutex; + +IdSequence::IdSequence() : id(1) {} + +u_int64_t IdSequence::next() +{ + Mutex::ScopedLock guard(lock); + if (!id) id++; // avoid 0 when folding around + return id++; +} + +void IdSequence::reset(uint64_t value) +{ + //deliberately not threadsafe, used only on recovery + id = value; +} diff --git a/cpp/src/qpid/legacystore/IdSequence.h b/cpp/src/qpid/legacystore/IdSequence.h new file mode 100644 index 0000000000..11d7ff61ca --- /dev/null +++ b/cpp/src/qpid/legacystore/IdSequence.h @@ -0,0 +1,44 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_IDSEQUENCE_H +#define QPID_LEGACYSTORE_IDSEQUENCE_H + +#include "qpid/framing/amqp_types.h" +#include "qpid/sys/Mutex.h" +#include <sys/types.h> + +namespace mrg{ +namespace msgstore{ + +class IdSequence +{ + qpid::sys::Mutex lock; + uint64_t id; +public: + IdSequence(); + uint64_t next(); + void reset(uint64_t value); +}; + +}} + +#endif // ifndef QPID_LEGACYSTORE_IDSEQUENCE_H diff --git a/cpp/src/qpid/legacystore/JournalImpl.cpp b/cpp/src/qpid/legacystore/JournalImpl.cpp new file mode 100644 index 0000000000..ba3f2aecae --- /dev/null +++ b/cpp/src/qpid/legacystore/JournalImpl.cpp @@ -0,0 +1,633 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/JournalImpl.h" + +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include "qpid/log/Statement.h" +#include "qpid/management/ManagementAgent.h" +#include "qmf/org/apache/qpid/legacystore/ArgsJournalExpand.h" +#include "qmf/org/apache/qpid/legacystore/EventCreated.h" +#include "qmf/org/apache/qpid/legacystore/EventEnqThresholdExceeded.h" +#include "qmf/org/apache/qpid/legacystore/EventFull.h" +#include "qmf/org/apache/qpid/legacystore/EventRecovered.h" +#include "qpid/sys/Monitor.h" +#include "qpid/sys/Timer.h" +#include "qpid/legacystore/StoreException.h" + +using namespace mrg::msgstore; +using namespace mrg::journal; +using qpid::management::ManagementAgent; +namespace _qmf = qmf::org::apache::qpid::legacystore; + +InactivityFireEvent::InactivityFireEvent(JournalImpl* p, const qpid::sys::Duration timeout): + qpid::sys::TimerTask(timeout, "JournalInactive:"+p->id()), _parent(p) {} + +void InactivityFireEvent::fire() { qpid::sys::Mutex::ScopedLock sl(_ife_lock); if (_parent) _parent->flushFire(); } + +GetEventsFireEvent::GetEventsFireEvent(JournalImpl* p, const qpid::sys::Duration timeout): + qpid::sys::TimerTask(timeout, "JournalGetEvents:"+p->id()), _parent(p) {} + +void GetEventsFireEvent::fire() { qpid::sys::Mutex::ScopedLock sl(_gefe_lock); if (_parent) _parent->getEventsFire(); } + +JournalImpl::JournalImpl(qpid::sys::Timer& timer_, + const std::string& journalId, + const std::string& journalDirectory, + const std::string& journalBaseFilename, + const qpid::sys::Duration getEventsTimeout, + const qpid::sys::Duration flushTimeout, + qpid::management::ManagementAgent* a, + DeleteCallback onDelete): + jcntl(journalId, journalDirectory, journalBaseFilename), + timer(timer_), + getEventsTimerSetFlag(false), + lastReadRid(0), + writeActivityFlag(false), + flushTriggeredFlag(true), + _xidp(0), + _datap(0), + _dlen(0), + _dtok(), + _external(false), + deleteCallback(onDelete) +{ + getEventsFireEventsPtr = new GetEventsFireEvent(this, getEventsTimeout); + inactivityFireEventPtr = new InactivityFireEvent(this, flushTimeout); + { + timer.start(); + timer.add(inactivityFireEventPtr); + } + + initManagement(a); + + log(LOG_NOTICE, "Created"); + std::ostringstream oss; + oss << "Journal directory = \"" << journalDirectory << "\"; Base file name = \"" << journalBaseFilename << "\""; + log(LOG_DEBUG, oss.str()); +} + +JournalImpl::~JournalImpl() +{ + if (deleteCallback) deleteCallback(*this); + if (_init_flag && !_stop_flag){ + try { stop(true); } // NOTE: This will *block* until all outstanding disk aio calls are complete! + catch (const jexception& e) { log(LOG_ERROR, e.what()); } + } + getEventsFireEventsPtr->cancel(); + inactivityFireEventPtr->cancel(); + free_read_buffers(); + + if (_mgmtObject.get() != 0) { + _mgmtObject->resourceDestroy(); + _mgmtObject.reset(); + } + + log(LOG_NOTICE, "Destroyed"); +} + +void +JournalImpl::initManagement(qpid::management::ManagementAgent* a) +{ + _agent = a; + if (_agent != 0) + { + _mgmtObject = _qmf::Journal::shared_ptr ( + new _qmf::Journal(_agent, this)); + + _mgmtObject->set_name(_jid); + _mgmtObject->set_directory(_jdir.dirname()); + _mgmtObject->set_baseFileName(_base_filename); + _mgmtObject->set_readPageSize(JRNL_RMGR_PAGE_SIZE * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE); + _mgmtObject->set_readPages(JRNL_RMGR_PAGES); + + // The following will be set on initialize(), but being properties, these must be set to 0 in the meantime + _mgmtObject->set_initialFileCount(0); + _mgmtObject->set_dataFileSize(0); + _mgmtObject->set_currentFileCount(0); + _mgmtObject->set_writePageSize(0); + _mgmtObject->set_writePages(0); + + _agent->addObject(_mgmtObject, 0, true); + } +} + + +void +JournalImpl::initialize(const u_int16_t num_jfiles, + const bool auto_expand, + const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, + const u_int16_t wcache_num_pages, + const u_int32_t wcache_pgsize_sblks, + mrg::journal::aio_callback* const cbp) +{ + std::ostringstream oss; + oss << "Initialize; num_jfiles=" << num_jfiles << " jfsize_sblks=" << jfsize_sblks; + oss << " wcache_pgsize_sblks=" << wcache_pgsize_sblks; + oss << " wcache_num_pages=" << wcache_num_pages; + log(LOG_DEBUG, oss.str()); + jcntl::initialize(num_jfiles, auto_expand, ae_max_jfiles, jfsize_sblks, wcache_num_pages, wcache_pgsize_sblks, cbp); + log(LOG_DEBUG, "Initialization complete"); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->set_initialFileCount(_lpmgr.num_jfiles()); + _mgmtObject->set_autoExpand(_lpmgr.is_ae()); + _mgmtObject->set_currentFileCount(_lpmgr.num_jfiles()); + _mgmtObject->set_maxFileCount(_lpmgr.ae_max_jfiles()); + _mgmtObject->set_dataFileSize(_jfsize_sblks * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE); + _mgmtObject->set_writePageSize(wcache_pgsize_sblks * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE); + _mgmtObject->set_writePages(wcache_num_pages); + } + if (_agent != 0) + _agent->raiseEvent(qmf::org::apache::qpid::legacystore::EventCreated(_jid, _jfsize_sblks * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE, _lpmgr.num_jfiles()), + qpid::management::ManagementAgent::SEV_NOTE); +} + +void +JournalImpl::recover(const u_int16_t num_jfiles, + const bool auto_expand, + const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, + const u_int16_t wcache_num_pages, + const u_int32_t wcache_pgsize_sblks, + mrg::journal::aio_callback* const cbp, + boost::ptr_list<msgstore::PreparedTransaction>* prep_tx_list_ptr, + u_int64_t& highest_rid, + u_int64_t queue_id) +{ + std::ostringstream oss1; + oss1 << "Recover; num_jfiles=" << num_jfiles << " jfsize_sblks=" << jfsize_sblks; + oss1 << " queue_id = 0x" << std::hex << queue_id << std::dec; + oss1 << " wcache_pgsize_sblks=" << wcache_pgsize_sblks; + oss1 << " wcache_num_pages=" << wcache_num_pages; + log(LOG_DEBUG, oss1.str()); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->set_initialFileCount(_lpmgr.num_jfiles()); + _mgmtObject->set_autoExpand(_lpmgr.is_ae()); + _mgmtObject->set_currentFileCount(_lpmgr.num_jfiles()); + _mgmtObject->set_maxFileCount(_lpmgr.ae_max_jfiles()); + _mgmtObject->set_dataFileSize(_jfsize_sblks * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE); + _mgmtObject->set_writePageSize(wcache_pgsize_sblks * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE); + _mgmtObject->set_writePages(wcache_num_pages); + } + + if (prep_tx_list_ptr) { + // Create list of prepared xids + std::vector<std::string> prep_xid_list; + for (msgstore::PreparedTransaction::list::iterator i = prep_tx_list_ptr->begin(); i != prep_tx_list_ptr->end(); i++) { + prep_xid_list.push_back(i->xid); + } + + jcntl::recover(num_jfiles, auto_expand, ae_max_jfiles, jfsize_sblks, wcache_num_pages, wcache_pgsize_sblks, + cbp, &prep_xid_list, highest_rid); + } else { + jcntl::recover(num_jfiles, auto_expand, ae_max_jfiles, jfsize_sblks, wcache_num_pages, wcache_pgsize_sblks, + cbp, 0, highest_rid); + } + + // Populate PreparedTransaction lists from _tmap + if (prep_tx_list_ptr) + { + for (msgstore::PreparedTransaction::list::iterator i = prep_tx_list_ptr->begin(); i != prep_tx_list_ptr->end(); i++) { + txn_data_list tdl = _tmap.get_tdata_list(i->xid); // tdl will be empty if xid not found + for (tdl_itr tdl_itr = tdl.begin(); tdl_itr < tdl.end(); tdl_itr++) { + if (tdl_itr->_enq_flag) { // enqueue op + i->enqueues->add(queue_id, tdl_itr->_rid); + } else { // dequeue op + i->dequeues->add(queue_id, tdl_itr->_drid); + } + } + } + } + std::ostringstream oss2; + oss2 << "Recover phase 1 complete; highest rid found = 0x" << std::hex << highest_rid; + oss2 << std::dec << "; emap.size=" << _emap.size() << "; tmap.size=" << _tmap.size(); + oss2 << "; journal now read-only."; + log(LOG_DEBUG, oss2.str()); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->inc_recordDepth(_emap.size()); + _mgmtObject->inc_enqueues(_emap.size()); + _mgmtObject->inc_txn(_tmap.size()); + _mgmtObject->inc_txnEnqueues(_tmap.enq_cnt()); + _mgmtObject->inc_txnDequeues(_tmap.deq_cnt()); + } +} + +void +JournalImpl::recover_complete() +{ + jcntl::recover_complete(); + log(LOG_DEBUG, "Recover phase 2 complete; journal now writable."); + if (_agent != 0) + _agent->raiseEvent(qmf::org::apache::qpid::legacystore::EventRecovered(_jid, _jfsize_sblks * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE, _lpmgr.num_jfiles(), + _emap.size(), _tmap.size(), _tmap.enq_cnt(), _tmap.deq_cnt()), qpid::management::ManagementAgent::SEV_NOTE); +} + +//#define MAX_AIO_SLEEPS 1000000 // tot: ~10 sec +//#define AIO_SLEEP_TIME_US 10 // 0.01 ms +// Return true if content is recovered from store; false if content is external and must be recovered from an external store. +// Throw exception for all errors. +bool +JournalImpl::loadMsgContent(u_int64_t rid, std::string& data, size_t length, size_t offset) +{ + qpid::sys::Mutex::ScopedLock sl(_read_lock); + if (_dtok.rid() != rid) + { + // Free any previous msg + free_read_buffers(); + + // Last read encountered out-of-order rids, check if this rid is in that list + bool oooFlag = false; + for (std::vector<u_int64_t>::const_iterator i=oooRidList.begin(); i!=oooRidList.end() && !oooFlag; i++) { + if (*i == rid) { + oooFlag = true; + } + } + + // TODO: This is a brutal approach - very inefficient and slow. Rather introduce a system of remembering + // jumpover points and allow the read to jump back to the first known jumpover point - but this needs + // a mechanism in rrfc to accomplish it. Also helpful is a struct containing a journal address - a + // combination of lid/offset. + // NOTE: The second part of the if stmt (rid < lastReadRid) is required to handle browsing. + if (oooFlag || rid < lastReadRid) { + _rmgr.invalidate(); + oooRidList.clear(); + } + _dlen = 0; + _dtok.reset(); + _dtok.set_wstate(DataTokenImpl::ENQ); + _dtok.set_rid(0); + _external = false; + size_t xlen = 0; + bool transient = false; + bool done = false; + bool rid_found = false; + while (!done) { + iores res = read_data_record(&_datap, _dlen, &_xidp, xlen, transient, _external, &_dtok); + switch (res) { + case mrg::journal::RHM_IORES_SUCCESS: + if (_dtok.rid() != rid) { + // Check if this is an out-of-order rid that may impact next read + if (_dtok.rid() > rid) + oooRidList.push_back(_dtok.rid()); + free_read_buffers(); + // Reset data token for next read + _dlen = 0; + _dtok.reset(); + _dtok.set_wstate(DataTokenImpl::ENQ); + _dtok.set_rid(0); + } else { + rid_found = _dtok.rid() == rid; + lastReadRid = rid; + done = true; + } + break; + case mrg::journal::RHM_IORES_PAGE_AIOWAIT: + if (get_wr_events(&_aio_cmpl_timeout) == journal::jerrno::AIO_TIMEOUT) { + std::stringstream ss; + ss << "read_data_record() returned " << mrg::journal::iores_str(res); + ss << "; timed out waiting for page to be processed."; + throw jexception(mrg::journal::jerrno::JERR__TIMEOUT, ss.str().c_str(), "JournalImpl", + "loadMsgContent"); + } + break; + default: + std::stringstream ss; + ss << "read_data_record() returned " << mrg::journal::iores_str(res); + throw jexception(mrg::journal::jerrno::JERR__UNEXPRESPONSE, ss.str().c_str(), "JournalImpl", + "loadMsgContent"); + } + } + if (!rid_found) { + std::stringstream ss; + ss << "read_data_record() was unable to find rid 0x" << std::hex << rid << std::dec; + ss << " (" << rid << "); last rid found was 0x" << std::hex << _dtok.rid() << std::dec; + ss << " (" << _dtok.rid() << ")"; + throw jexception(mrg::journal::jerrno::JERR__RECNFOUND, ss.str().c_str(), "JournalImpl", "loadMsgContent"); + } + } + + if (_external) return false; + + u_int32_t hdr_offs = qpid::framing::Buffer(static_cast<char*>(_datap), sizeof(u_int32_t)).getLong() + sizeof(u_int32_t); + if (hdr_offs + offset + length > _dlen) { + data.append((const char*)_datap + hdr_offs + offset, _dlen - hdr_offs - offset); + } else { + data.append((const char*)_datap + hdr_offs + offset, length); + } + return true; +} + +void +JournalImpl::enqueue_data_record(const void* const data_buff, const size_t tot_data_len, + const size_t this_data_len, data_tok* dtokp, const bool transient) +{ + handleIoResult(jcntl::enqueue_data_record(data_buff, tot_data_len, this_data_len, dtokp, transient)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->inc_enqueues(); + _mgmtObject->inc_recordDepth(); + } +} + +void +JournalImpl::enqueue_extern_data_record(const size_t tot_data_len, data_tok* dtokp, + const bool transient) +{ + handleIoResult(jcntl::enqueue_extern_data_record(tot_data_len, dtokp, transient)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->inc_enqueues(); + _mgmtObject->inc_recordDepth(); + } +} + +void +JournalImpl::enqueue_txn_data_record(const void* const data_buff, const size_t tot_data_len, + const size_t this_data_len, data_tok* dtokp, const std::string& xid, const bool transient) +{ + bool txn_incr = _mgmtObject.get() != 0 ? _tmap.in_map(xid) : false; + + handleIoResult(jcntl::enqueue_txn_data_record(data_buff, tot_data_len, this_data_len, dtokp, xid, transient)); + + if (_mgmtObject.get() != 0) + { + if (!txn_incr) // If this xid was not in _tmap, it will be now... + _mgmtObject->inc_txn(); + _mgmtObject->inc_enqueues(); + _mgmtObject->inc_txnEnqueues(); + _mgmtObject->inc_recordDepth(); + } +} + +void +JournalImpl::enqueue_extern_txn_data_record(const size_t tot_data_len, data_tok* dtokp, + const std::string& xid, const bool transient) +{ + bool txn_incr = _mgmtObject.get() != 0 ? _tmap.in_map(xid) : false; + + handleIoResult(jcntl::enqueue_extern_txn_data_record(tot_data_len, dtokp, xid, transient)); + + if (_mgmtObject.get() != 0) + { + if (!txn_incr) // If this xid was not in _tmap, it will be now... + _mgmtObject->inc_txn(); + _mgmtObject->inc_enqueues(); + _mgmtObject->inc_txnEnqueues(); + _mgmtObject->inc_recordDepth(); + } +} + +void +JournalImpl::dequeue_data_record(data_tok* const dtokp, const bool txn_coml_commit) +{ + handleIoResult(jcntl::dequeue_data_record(dtokp, txn_coml_commit)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->inc_dequeues(); + _mgmtObject->inc_txnDequeues(); + _mgmtObject->dec_recordDepth(); + } +} + +void +JournalImpl::dequeue_txn_data_record(data_tok* const dtokp, const std::string& xid, const bool txn_coml_commit) +{ + bool txn_incr = _mgmtObject.get() != 0 ? _tmap.in_map(xid) : false; + + handleIoResult(jcntl::dequeue_txn_data_record(dtokp, xid, txn_coml_commit)); + + if (_mgmtObject.get() != 0) + { + if (!txn_incr) // If this xid was not in _tmap, it will be now... + _mgmtObject->inc_txn(); + _mgmtObject->inc_dequeues(); + _mgmtObject->inc_txnDequeues(); + _mgmtObject->dec_recordDepth(); + } +} + +void +JournalImpl::txn_abort(data_tok* const dtokp, const std::string& xid) +{ + handleIoResult(jcntl::txn_abort(dtokp, xid)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->dec_txn(); + _mgmtObject->inc_txnAborts(); + } +} + +void +JournalImpl::txn_commit(data_tok* const dtokp, const std::string& xid) +{ + handleIoResult(jcntl::txn_commit(dtokp, xid)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->dec_txn(); + _mgmtObject->inc_txnCommits(); + } +} + +void +JournalImpl::stop(bool block_till_aio_cmpl) +{ + InactivityFireEvent* ifep = dynamic_cast<InactivityFireEvent*>(inactivityFireEventPtr.get()); + assert(ifep); // dynamic_cast can return null if the cast fails + ifep->cancel(); + jcntl::stop(block_till_aio_cmpl); + + if (_mgmtObject.get() != 0) { + _mgmtObject->resourceDestroy(); + _mgmtObject.reset(); + } +} + +iores +JournalImpl::flush(const bool block_till_aio_cmpl) +{ + const iores res = jcntl::flush(block_till_aio_cmpl); + { + qpid::sys::Mutex::ScopedLock sl(_getf_lock); + if (_wmgr.get_aio_evt_rem() && !getEventsTimerSetFlag) { setGetEventTimer(); } + } + return res; +} + +void +JournalImpl::log(mrg::journal::log_level ll, const std::string& log_stmt) const +{ + log(ll, log_stmt.c_str()); +} + +void +JournalImpl::log(mrg::journal::log_level ll, const char* const log_stmt) const +{ + switch (ll) + { + case LOG_TRACE: QPID_LOG(trace, "Journal \"" << _jid << "\": " << log_stmt); break; + case LOG_DEBUG: QPID_LOG(debug, "Journal \"" << _jid << "\": " << log_stmt); break; + case LOG_INFO: QPID_LOG(info, "Journal \"" << _jid << "\": " << log_stmt); break; + case LOG_NOTICE: QPID_LOG(notice, "Journal \"" << _jid << "\": " << log_stmt); break; + case LOG_WARN: QPID_LOG(warning, "Journal \"" << _jid << "\": " << log_stmt); break; + case LOG_ERROR: QPID_LOG(error, "Journal \"" << _jid << "\": " << log_stmt); break; + case LOG_CRITICAL: QPID_LOG(critical, "Journal \"" << _jid << "\": " << log_stmt); break; + } +} + +void +JournalImpl::getEventsFire() +{ + qpid::sys::Mutex::ScopedLock sl(_getf_lock); + getEventsTimerSetFlag = false; + if (_wmgr.get_aio_evt_rem()) { jcntl::get_wr_events(0); } + if (_wmgr.get_aio_evt_rem()) { setGetEventTimer(); } +} + +void +JournalImpl::flushFire() +{ + if (writeActivityFlag) { + writeActivityFlag = false; + flushTriggeredFlag = false; + } else { + if (!flushTriggeredFlag) { + flush(); + flushTriggeredFlag = true; + } + } + inactivityFireEventPtr->setupNextFire(); + { + timer.add(inactivityFireEventPtr); + } +} + +void +JournalImpl::wr_aio_cb(std::vector<data_tok*>& dtokl) +{ + for (std::vector<data_tok*>::const_iterator i=dtokl.begin(); i!=dtokl.end(); i++) + { + DataTokenImpl* dtokp = static_cast<DataTokenImpl*>(*i); + if (/*!is_stopped() &&*/ dtokp->getSourceMessage()) + { + switch (dtokp->wstate()) + { + case data_tok::ENQ: + dtokp->getSourceMessage()->enqueueComplete(); + break; + case data_tok::DEQ: +/* Don't need to signal until we have a way to ack completion of dequeue in AMQP + dtokp->getSourceMessage()->dequeueComplete(); + if ( dtokp->getSourceMessage()->isDequeueComplete() ) // clear id after last dequeue + dtokp->getSourceMessage()->setPersistenceId(0); +*/ + break; + default: ; + } + } + dtokp->release(); + } +} + +void +JournalImpl::rd_aio_cb(std::vector<u_int16_t>& /*pil*/) +{} + +void +JournalImpl::free_read_buffers() +{ + if (_xidp) { + ::free(_xidp); + _xidp = 0; + _datap = 0; + } else if (_datap) { + ::free(_datap); + _datap = 0; + } +} + +void +JournalImpl::handleIoResult(const iores r) +{ + writeActivityFlag = true; + switch (r) + { + case mrg::journal::RHM_IORES_SUCCESS: + return; + case mrg::journal::RHM_IORES_ENQCAPTHRESH: + { + std::ostringstream oss; + oss << "Enqueue capacity threshold exceeded on queue \"" << _jid << "\"."; + log(LOG_WARN, oss.str()); + if (_agent != 0) + _agent->raiseEvent(qmf::org::apache::qpid::legacystore::EventEnqThresholdExceeded(_jid, "Journal enqueue capacity threshold exceeded"), + qpid::management::ManagementAgent::SEV_WARN); + THROW_STORE_FULL_EXCEPTION(oss.str()); + } + case mrg::journal::RHM_IORES_FULL: + { + std::ostringstream oss; + oss << "Journal full on queue \"" << _jid << "\"."; + log(LOG_CRITICAL, oss.str()); + if (_agent != 0) + _agent->raiseEvent(qmf::org::apache::qpid::legacystore::EventFull(_jid, "Journal full"), qpid::management::ManagementAgent::SEV_ERROR); + THROW_STORE_FULL_EXCEPTION(oss.str()); + } + default: + { + std::ostringstream oss; + oss << "Unexpected I/O response (" << mrg::journal::iores_str(r) << ") on queue " << _jid << "\"."; + log(LOG_ERROR, oss.str()); + THROW_STORE_FULL_EXCEPTION(oss.str()); + } + } +} + +qpid::management::Manageable::status_t JournalImpl::ManagementMethod (uint32_t methodId, + qpid::management::Args& /*args*/, + std::string& /*text*/) +{ + Manageable::status_t status = Manageable::STATUS_UNKNOWN_METHOD; + + switch (methodId) + { + case _qmf::Journal::METHOD_EXPAND : + //_qmf::ArgsJournalExpand& eArgs = (_qmf::ArgsJournalExpand&) args; + + // Implement "expand" using eArgs.i_by (expand-by argument) + + status = Manageable::STATUS_NOT_IMPLEMENTED; + break; + } + + return status; +} diff --git a/cpp/src/qpid/legacystore/JournalImpl.h b/cpp/src/qpid/legacystore/JournalImpl.h new file mode 100644 index 0000000000..7227b2ffd4 --- /dev/null +++ b/cpp/src/qpid/legacystore/JournalImpl.h @@ -0,0 +1,265 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_JOURNALIMPL_H +#define QPID_LEGACYSTORE_JOURNALIMPL_H + +#include <set> +#include "qpid/legacystore/jrnl/enums.h" +#include "qpid/legacystore/jrnl/jcntl.h" +#include "qpid/legacystore/DataTokenImpl.h" +#include "qpid/legacystore/PreparedTransaction.h" +#include "qpid/broker/PersistableQueue.h" +#include "qpid/sys/Timer.h" +#include "qpid/sys/Time.h" +#include <boost/ptr_container/ptr_list.hpp> +#include <boost/intrusive_ptr.hpp> +#include "qpid/management/Manageable.h" +#include "qmf/org/apache/qpid/legacystore/Journal.h" + +namespace qpid { namespace sys { +class Timer; +}} + +namespace mrg { +namespace msgstore { + +class JournalImpl; + +class InactivityFireEvent : public qpid::sys::TimerTask +{ + JournalImpl* _parent; + qpid::sys::Mutex _ife_lock; + + public: + InactivityFireEvent(JournalImpl* p, const qpid::sys::Duration timeout); + virtual ~InactivityFireEvent() {} + void fire(); + inline void cancel() { qpid::sys::Mutex::ScopedLock sl(_ife_lock); _parent = 0; } +}; + +class GetEventsFireEvent : public qpid::sys::TimerTask +{ + JournalImpl* _parent; + qpid::sys::Mutex _gefe_lock; + + public: + GetEventsFireEvent(JournalImpl* p, const qpid::sys::Duration timeout); + virtual ~GetEventsFireEvent() {} + void fire(); + inline void cancel() { qpid::sys::Mutex::ScopedLock sl(_gefe_lock); _parent = 0; } +}; + +class JournalImpl : public qpid::broker::ExternalQueueStore, public mrg::journal::jcntl, public mrg::journal::aio_callback +{ + public: + typedef boost::function<void (JournalImpl&)> DeleteCallback; + + private: +// static qpid::sys::Mutex _static_lock; +// static u_int32_t cnt; + + qpid::sys::Timer& timer; + bool getEventsTimerSetFlag; + boost::intrusive_ptr<qpid::sys::TimerTask> getEventsFireEventsPtr; + qpid::sys::Mutex _getf_lock; + qpid::sys::Mutex _read_lock; + + u_int64_t lastReadRid; // rid of last read msg for loadMsgContent() - detects out-of-order read requests + std::vector<u_int64_t> oooRidList; // list of out-of-order rids (greater than current rid) encountered during read sequence + + bool writeActivityFlag; + bool flushTriggeredFlag; + boost::intrusive_ptr<qpid::sys::TimerTask> inactivityFireEventPtr; + + // temp local vars for loadMsgContent below + void* _xidp; + void* _datap; + size_t _dlen; + mrg::journal::data_tok _dtok; + bool _external; + + qpid::management::ManagementAgent* _agent; + qmf::org::apache::qpid::legacystore::Journal::shared_ptr _mgmtObject; + DeleteCallback deleteCallback; + + public: + + JournalImpl(qpid::sys::Timer& timer, + const std::string& journalId, + const std::string& journalDirectory, + const std::string& journalBaseFilename, + const qpid::sys::Duration getEventsTimeout, + const qpid::sys::Duration flushTimeout, + qpid::management::ManagementAgent* agent, + DeleteCallback deleteCallback=DeleteCallback() ); + + virtual ~JournalImpl(); + + void initManagement(qpid::management::ManagementAgent* agent); + + void initialize(const u_int16_t num_jfiles, + const bool auto_expand, + const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, + const u_int16_t wcache_num_pages, + const u_int32_t wcache_pgsize_sblks, + mrg::journal::aio_callback* const cbp); + + inline void initialize(const u_int16_t num_jfiles, + const bool auto_expand, + const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, + const u_int16_t wcache_num_pages, + const u_int32_t wcache_pgsize_sblks) { + initialize(num_jfiles, auto_expand, ae_max_jfiles, jfsize_sblks, wcache_num_pages, wcache_pgsize_sblks, + this); + } + + void recover(const u_int16_t num_jfiles, + const bool auto_expand, + const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, + const u_int16_t wcache_num_pages, + const u_int32_t wcache_pgsize_sblks, + mrg::journal::aio_callback* const cbp, + boost::ptr_list<msgstore::PreparedTransaction>* prep_tx_list_ptr, + u_int64_t& highest_rid, + u_int64_t queue_id); + + inline void recover(const u_int16_t num_jfiles, + const bool auto_expand, + const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, + const u_int16_t wcache_num_pages, + const u_int32_t wcache_pgsize_sblks, + boost::ptr_list<msgstore::PreparedTransaction>* prep_tx_list_ptr, + u_int64_t& highest_rid, + u_int64_t queue_id) { + recover(num_jfiles, auto_expand, ae_max_jfiles, jfsize_sblks, wcache_num_pages, wcache_pgsize_sblks, + this, prep_tx_list_ptr, highest_rid, queue_id); + } + + void recover_complete(); + + // Temporary fn to read and save last msg read from journal so it can be assigned + // in chunks. To be replaced when coding to do this direct from the journal is ready. + // Returns true if the record is extern, false if local. + bool loadMsgContent(u_int64_t rid, std::string& data, size_t length, size_t offset = 0); + + // Overrides for write inactivity timer + void enqueue_data_record(const void* const data_buff, const size_t tot_data_len, + const size_t this_data_len, mrg::journal::data_tok* dtokp, + const bool transient = false); + + void enqueue_extern_data_record(const size_t tot_data_len, mrg::journal::data_tok* dtokp, + const bool transient = false); + + void enqueue_txn_data_record(const void* const data_buff, const size_t tot_data_len, + const size_t this_data_len, mrg::journal::data_tok* dtokp, const std::string& xid, + const bool transient = false); + + void enqueue_extern_txn_data_record(const size_t tot_data_len, mrg::journal::data_tok* dtokp, + const std::string& xid, const bool transient = false); + + void dequeue_data_record(mrg::journal::data_tok* const dtokp, const bool txn_coml_commit = false); + + void dequeue_txn_data_record(mrg::journal::data_tok* const dtokp, const std::string& xid, const bool txn_coml_commit = false); + + void txn_abort(mrg::journal::data_tok* const dtokp, const std::string& xid); + + void txn_commit(mrg::journal::data_tok* const dtokp, const std::string& xid); + + void stop(bool block_till_aio_cmpl = false); + + // Logging + void log(mrg::journal::log_level level, const std::string& log_stmt) const; + void log(mrg::journal::log_level level, const char* const log_stmt) const; + + // Overrides for get_events timer + mrg::journal::iores flush(const bool block_till_aio_cmpl = false); + + // TimerTask callback + void getEventsFire(); + void flushFire(); + + // AIO callbacks + virtual void wr_aio_cb(std::vector<mrg::journal::data_tok*>& dtokl); + virtual void rd_aio_cb(std::vector<u_int16_t>& pil); + + qpid::management::ManagementObject::shared_ptr GetManagementObject (void) const + { return _mgmtObject; } + + qpid::management::Manageable::status_t ManagementMethod (uint32_t, + qpid::management::Args&, + std::string&); + + void resetDeleteCallback() { deleteCallback = DeleteCallback(); } + + private: + void free_read_buffers(); + + inline void setGetEventTimer() + { + getEventsFireEventsPtr->setupNextFire(); + timer.add(getEventsFireEventsPtr); + getEventsTimerSetFlag = true; + } + void handleIoResult(const mrg::journal::iores r); + + // Management instrumentation callbacks overridden from jcntl + inline void instr_incr_outstanding_aio_cnt() { + if (_mgmtObject.get() != 0) _mgmtObject->inc_outstandingAIOs(); + } + inline void instr_decr_outstanding_aio_cnt() { + if (_mgmtObject.get() != 0) _mgmtObject->dec_outstandingAIOs(); + } + +}; // class JournalImpl + +class TplJournalImpl : public JournalImpl +{ + public: + TplJournalImpl(qpid::sys::Timer& timer, + const std::string& journalId, + const std::string& journalDirectory, + const std::string& journalBaseFilename, + const qpid::sys::Duration getEventsTimeout, + const qpid::sys::Duration flushTimeout, + qpid::management::ManagementAgent* agent) : + JournalImpl(timer, journalId, journalDirectory, journalBaseFilename, getEventsTimeout, flushTimeout, agent) + {} + + virtual ~TplJournalImpl() {} + + // Special version of read_data_record that ignores transactions - needed when reading the TPL + inline mrg::journal::iores read_data_record(void** const datapp, std::size_t& dsize, + void** const xidpp, std::size_t& xidsize, bool& transient, bool& external, + mrg::journal::data_tok* const dtokp) { + return JournalImpl::read_data_record(datapp, dsize, xidpp, xidsize, transient, external, dtokp, true); + } + inline void read_reset() { _rmgr.invalidate(); } +}; // class TplJournalImpl + +} // namespace msgstore +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JOURNALIMPL_H diff --git a/cpp/src/qpid/legacystore/MessageStoreImpl.cpp b/cpp/src/qpid/legacystore/MessageStoreImpl.cpp new file mode 100644 index 0000000000..69e9f48a17 --- /dev/null +++ b/cpp/src/qpid/legacystore/MessageStoreImpl.cpp @@ -0,0 +1,1732 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/MessageStoreImpl.h" + +#include "qpid/legacystore/BindingDbt.h" +#include "qpid/legacystore/BufferValue.h" +#include "qpid/legacystore/IdDbt.h" +#include "qpid/legacystore/jrnl/txn_map.h" +#include "qpid/framing/FieldValue.h" +#include "qpid/log/Statement.h" +#include "qmf/org/apache/qpid/legacystore/Package.h" +#include "qpid/legacystore/StoreException.h" +#include <dirent.h> +#include <db.h> + +#define MAX_AIO_SLEEPS 100000 // tot: ~1 sec +#define AIO_SLEEP_TIME_US 10 // 0.01 ms + +namespace _qmf = qmf::org::apache::qpid::legacystore; + +namespace mrg { +namespace msgstore { + + +const std::string MessageStoreImpl::storeTopLevelDir("rhm"); // Sets the top-level store dir name +// FIXME aconway 2010-03-09: was 10 +qpid::sys::Duration MessageStoreImpl::defJournalGetEventsTimeout(1 * qpid::sys::TIME_MSEC); // 10ms +qpid::sys::Duration MessageStoreImpl::defJournalFlushTimeout(500 * qpid::sys::TIME_MSEC); // 0.5s +qpid::sys::Mutex TxnCtxt::globalSerialiser; + +MessageStoreImpl::TplRecoverStruct::TplRecoverStruct(const u_int64_t _rid, + const bool _deq_flag, + const bool _commit_flag, + const bool _tpc_flag) : + rid(_rid), + deq_flag(_deq_flag), + commit_flag(_commit_flag), + tpc_flag(_tpc_flag) +{} + +MessageStoreImpl::MessageStoreImpl(qpid::broker::Broker* broker_, const char* envpath) : + numJrnlFiles(0), + autoJrnlExpand(false), + autoJrnlExpandMaxFiles(0), + jrnlFsizeSblks(0), + truncateFlag(false), + wCachePgSizeSblks(0), + wCacheNumPages(0), + tplNumJrnlFiles(0), + tplJrnlFsizeSblks(0), + tplWCachePgSizeSblks(0), + tplWCacheNumPages(0), + highestRid(0), + isInit(false), + envPath(envpath), + broker(broker_), + mgmtObject(), + agent(0) +{} + +u_int16_t MessageStoreImpl::chkJrnlNumFilesParam(const u_int16_t param, const std::string paramName) +{ + u_int16_t p = param; + if (p < JRNL_MIN_NUM_FILES) { + p = JRNL_MIN_NUM_FILES; + QPID_LOG(warning, "parameter " << paramName << " (" << param << ") is below allowable minimum (" << JRNL_MIN_NUM_FILES << "); changing this parameter to minimum value."); + } else if (p > JRNL_MAX_NUM_FILES) { + p = JRNL_MAX_NUM_FILES; + QPID_LOG(warning, "parameter " << paramName << " (" << param << ") is above allowable maximum (" << JRNL_MAX_NUM_FILES << "); changing this parameter to maximum value."); + } + return p; +} + +u_int32_t MessageStoreImpl::chkJrnlFileSizeParam(const u_int32_t param, const std::string paramName, const u_int32_t wCachePgSizeSblks) +{ + u_int32_t p = param; + u_int32_t min = JRNL_MIN_FILE_SIZE / JRNL_RMGR_PAGE_SIZE; + u_int32_t max = JRNL_MAX_FILE_SIZE / JRNL_RMGR_PAGE_SIZE; + if (p < min) { + p = min; + QPID_LOG(warning, "parameter " << paramName << " (" << param << ") is below allowable minimum (" << min << "); changing this parameter to minimum value."); + } else if (p > max) { + p = max; + QPID_LOG(warning, "parameter " << paramName << " (" << param << ") is above allowable maximum (" << max << "); changing this parameter to maximum value."); + } + if (wCachePgSizeSblks > p * JRNL_RMGR_PAGE_SIZE) { + std::ostringstream oss; + oss << "Cannot create store with file size less than write page cache size. [file size = " << p << " (" << (p * JRNL_RMGR_PAGE_SIZE / 2) << " kB); write page cache = " << (wCachePgSizeSblks / 2) << " kB]"; + THROW_STORE_EXCEPTION(oss.str()); + } + return p; +} + +u_int32_t MessageStoreImpl::chkJrnlWrPageCacheSize(const u_int32_t param, const std::string paramName, const u_int16_t jrnlFsizePgs) +{ + u_int32_t p = param; + switch (p) + { + case 1: + case 2: + case 4: + case 8: + case 16: + case 32: + case 64: + case 128: + if (jrnlFsizePgs == 1) { + p = 64; + QPID_LOG(warning, "parameter " << paramName << " (" << param << ") cannot set a page size greater than the journal file size; changing this parameter to the journal file size (" << p << ")"); + } + break; + default: + if (p == 0) { + // For zero value, use default + p = JRNL_WMGR_DEF_PAGE_SIZE * JRNL_DBLK_SIZE * JRNL_SBLK_SIZE / 1024; + QPID_LOG(warning, "parameter " << paramName << " (" << param << ") must be a power of 2 between 1 and 128; changing this parameter to default value (" << p << ")"); + } else { + // For any positive value, use closest value + if (p < 6) p = 4; + else if (p < 12) p = 8; + else if (p < 24) p = 16; + else if (p < 48) p = 32; + else if (p < 96) p = 64; + else p = 128; + QPID_LOG(warning, "parameter " << paramName << " (" << param << ") must be a power of 2 between 1 and 128; changing this parameter to closest allowable value (" << p << ")"); + } + } + return p; +} + +u_int16_t MessageStoreImpl::getJrnlWrNumPages(const u_int32_t wrPageSizeKib) +{ + u_int32_t wrPageSizeSblks = wrPageSizeKib * 1024 / JRNL_DBLK_SIZE / JRNL_SBLK_SIZE; // convert from KiB to number sblks + u_int32_t defTotWCacheSize = JRNL_WMGR_DEF_PAGE_SIZE * JRNL_WMGR_DEF_PAGES; // in sblks. Currently 2014 sblks (1 MiB). + switch (wrPageSizeKib) + { + case 1: + case 2: + case 4: + // 256 KiB total cache + return defTotWCacheSize / wrPageSizeSblks / 4; + case 8: + case 16: + // 512 KiB total cache + return defTotWCacheSize / wrPageSizeSblks / 2; + default: // 32, 64, 128 + // 1 MiB total cache + return defTotWCacheSize / wrPageSizeSblks; + } +} + +void MessageStoreImpl::chkJrnlAutoExpandOptions(const StoreOptions* opts, + bool& autoJrnlExpand, + u_int16_t& autoJrnlExpandMaxFiles, + const std::string& autoJrnlExpandMaxFilesParamName, + const u_int16_t numJrnlFiles, + const std::string& numJrnlFilesParamName) +{ + if (!opts->autoJrnlExpand) { + // auto-expand disabled + autoJrnlExpand = false; + autoJrnlExpandMaxFiles = 0; + return; + } + u_int16_t p = opts->autoJrnlExpandMaxFiles; + if (numJrnlFiles == JRNL_MAX_NUM_FILES) { + // num-jfiles at max; disable auto-expand + autoJrnlExpand = false; + autoJrnlExpandMaxFiles = 0; + QPID_LOG(warning, "parameter " << autoJrnlExpandMaxFilesParamName << " (" << p << ") must be higher than parameter " + << numJrnlFilesParamName << " (" << numJrnlFiles << ") which is at the maximum allowable value; disabling auto-expand."); + return; + } + if (p > JRNL_MAX_NUM_FILES) { + // auto-expand-max-jfiles higher than max allowable, adjust + autoJrnlExpand = true; + autoJrnlExpandMaxFiles = JRNL_MAX_NUM_FILES; + QPID_LOG(warning, "parameter " << autoJrnlExpandMaxFilesParamName << " (" << p << ") is above allowable maximum (" + << JRNL_MAX_NUM_FILES << "); changing this parameter to maximum value."); + return; + } + if (p && p == defAutoJrnlExpandMaxFiles && numJrnlFiles != defTplNumJrnlFiles) { + // num-jfiles is different from the default AND max-auto-expand-jfiles is still at default + // change value of max-auto-expand-jfiles + autoJrnlExpand = true; + if (2 * numJrnlFiles <= JRNL_MAX_NUM_FILES) { + autoJrnlExpandMaxFiles = 2 * numJrnlFiles <= JRNL_MAX_NUM_FILES ? 2 * numJrnlFiles : JRNL_MAX_NUM_FILES; + QPID_LOG(warning, "parameter " << autoJrnlExpandMaxFilesParamName << " adjusted from its default value (" + << defAutoJrnlExpandMaxFiles << ") to twice that of parameter " << numJrnlFilesParamName << " (" << autoJrnlExpandMaxFiles << ")."); + } else { + autoJrnlExpandMaxFiles = 2 * numJrnlFiles <= JRNL_MAX_NUM_FILES ? 2 * numJrnlFiles : JRNL_MAX_NUM_FILES; + QPID_LOG(warning, "parameter " << autoJrnlExpandMaxFilesParamName << " adjusted from its default to maximum allowable value (" + << JRNL_MAX_NUM_FILES << ") because of the value of " << numJrnlFilesParamName << " (" << numJrnlFiles << ")."); + } + return; + } + // No adjustments req'd, set values + autoJrnlExpand = true; + autoJrnlExpandMaxFiles = p; +} + +void MessageStoreImpl::initManagement () +{ + if (broker != 0) { + agent = broker->getManagementAgent(); + if (agent != 0) { + _qmf::Package packageInitializer(agent); + mgmtObject = _qmf::Store::shared_ptr ( + new _qmf::Store(agent, this, broker)); + + mgmtObject->set_location(storeDir); + mgmtObject->set_defaultInitialFileCount(numJrnlFiles); + mgmtObject->set_defaultDataFileSize(jrnlFsizeSblks / JRNL_RMGR_PAGE_SIZE); + mgmtObject->set_tplIsInitialized(false); + mgmtObject->set_tplDirectory(getTplBaseDir()); + mgmtObject->set_tplWritePageSize(tplWCachePgSizeSblks * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE); + mgmtObject->set_tplWritePages(tplWCacheNumPages); + mgmtObject->set_tplInitialFileCount(tplNumJrnlFiles); + mgmtObject->set_tplDataFileSize(tplJrnlFsizeSblks * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE); + mgmtObject->set_tplCurrentFileCount(tplNumJrnlFiles); + + agent->addObject(mgmtObject, 0, true); + + // Initialize all existing queues (ie those recovered before management was initialized) + for (JournalListMapItr i=journalList.begin(); i!=journalList.end(); i++) { + i->second->initManagement(agent); + } + } + } +} + +bool MessageStoreImpl::init(const qpid::Options* options) +{ + // Extract and check options + const StoreOptions* opts = static_cast<const StoreOptions*>(options); + u_int16_t numJrnlFiles = chkJrnlNumFilesParam(opts->numJrnlFiles, "num-jfiles"); + u_int32_t jrnlFsizePgs = chkJrnlFileSizeParam(opts->jrnlFsizePgs, "jfile-size-pgs"); + u_int32_t jrnlWrCachePageSizeKib = chkJrnlWrPageCacheSize(opts->wCachePageSizeKib, "wcache-page-size", jrnlFsizePgs); + u_int16_t tplNumJrnlFiles = chkJrnlNumFilesParam(opts->tplNumJrnlFiles, "tpl-num-jfiles"); + u_int32_t tplJrnlFSizePgs = chkJrnlFileSizeParam(opts->tplJrnlFsizePgs, "tpl-jfile-size-pgs"); + u_int32_t tplJrnlWrCachePageSizeKib = chkJrnlWrPageCacheSize(opts->tplWCachePageSizeKib, "tpl-wcache-page-size", tplJrnlFSizePgs); + bool autoJrnlExpand; + u_int16_t autoJrnlExpandMaxFiles; + chkJrnlAutoExpandOptions(opts, autoJrnlExpand, autoJrnlExpandMaxFiles, "auto-expand-max-jfiles", numJrnlFiles, "num-jfiles"); + + // Pass option values to init(...) + return init(opts->storeDir, numJrnlFiles, jrnlFsizePgs, opts->truncateFlag, jrnlWrCachePageSizeKib, tplNumJrnlFiles, tplJrnlFSizePgs, tplJrnlWrCachePageSizeKib, autoJrnlExpand, autoJrnlExpandMaxFiles); +} + +// These params, taken from options, are assumed to be correct and verified +bool MessageStoreImpl::init(const std::string& dir, + u_int16_t jfiles, + u_int32_t jfileSizePgs, + const bool truncateFlag, + u_int32_t wCachePageSizeKib, + u_int16_t tplJfiles, + u_int32_t tplJfileSizePgs, + u_int32_t tplWCachePageSizeKib, + bool autoJExpand, + u_int16_t autoJExpandMaxFiles) +{ + if (isInit) return true; + + // Set geometry members (converting to correct units where req'd) + numJrnlFiles = jfiles; + jrnlFsizeSblks = jfileSizePgs * JRNL_RMGR_PAGE_SIZE; + wCachePgSizeSblks = wCachePageSizeKib * 1024 / JRNL_DBLK_SIZE / JRNL_SBLK_SIZE; // convert from KiB to number sblks + wCacheNumPages = getJrnlWrNumPages(wCachePageSizeKib); + tplNumJrnlFiles = tplJfiles; + tplJrnlFsizeSblks = tplJfileSizePgs * JRNL_RMGR_PAGE_SIZE; + tplWCachePgSizeSblks = tplWCachePageSizeKib * 1024 / JRNL_DBLK_SIZE / JRNL_SBLK_SIZE; // convert from KiB to number sblks + tplWCacheNumPages = getJrnlWrNumPages(tplWCachePageSizeKib); + autoJrnlExpand = autoJExpand; + autoJrnlExpandMaxFiles = autoJExpandMaxFiles; + if (dir.size()>0) storeDir = dir; + + if (truncateFlag) + truncateInit(false); + else + init(); + + QPID_LOG(notice, "Store module initialized; store-dir=" << dir); + QPID_LOG(info, "> Default files per journal: " << jfiles); +// TODO: Uncomment these lines when auto-expand is enabled. +// QPID_LOG(info, "> Auto-expand " << (autoJrnlExpand ? "enabled" : "disabled")); +// if (autoJrnlExpand) QPID_LOG(info, "> Max auto-expand journal files: " << autoJrnlExpandMaxFiles); + QPID_LOG(info, "> Default journal file size: " << jfileSizePgs << " (wpgs)"); + QPID_LOG(info, "> Default write cache page size: " << wCachePageSizeKib << " (KiB)"); + QPID_LOG(info, "> Default number of write cache pages: " << wCacheNumPages); + QPID_LOG(info, "> TPL files per journal: " << tplNumJrnlFiles); + QPID_LOG(info, "> TPL journal file size: " << tplJfileSizePgs << " (wpgs)"); + QPID_LOG(info, "> TPL write cache page size: " << tplWCachePageSizeKib << " (KiB)"); + QPID_LOG(info, "> TPL number of write cache pages: " << tplWCacheNumPages); + + return isInit; +} + +void MessageStoreImpl::init() +{ + const int retryMax = 3; + int bdbRetryCnt = 0; + do { + if (bdbRetryCnt++ > 0) + { + closeDbs(); + ::usleep(1000000); // 1 sec delay + QPID_LOG(error, "Previoius BDB store initialization failed, retrying (" << bdbRetryCnt << " of " << retryMax << ")..."); + } + + try { + journal::jdir::create_dir(getBdbBaseDir()); + + dbenv.reset(new DbEnv(0)); + dbenv->set_errpfx("msgstore"); + dbenv->set_lg_regionmax(256000); // default = 65000 + dbenv->open(getBdbBaseDir().c_str(), DB_THREAD | DB_CREATE | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_USE_ENVIRON | DB_RECOVER, 0); + + // Databases are constructed here instead of the constructor so that the DB_RECOVER flag can be used + // against the database environment. Recover can only be performed if no databases have been created + // against the environment at the time of recovery, as recovery invalidates the environment. + queueDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(queueDb); + configDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(configDb); + exchangeDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(exchangeDb); + mappingDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(mappingDb); + bindingDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(bindingDb); + generalDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(generalDb); + + TxnCtxt txn; + txn.begin(dbenv.get(), false); + try { + open(queueDb, txn.get(), "queues.db", false); + open(configDb, txn.get(), "config.db", false); + open(exchangeDb, txn.get(), "exchanges.db", false); + open(mappingDb, txn.get(), "mappings.db", true); + open(bindingDb, txn.get(), "bindings.db", true); + open(generalDb, txn.get(), "general.db", false); + txn.commit(); + } catch (...) { txn.abort(); throw; } + // NOTE: during normal initialization, agent == 0 because the store is initialized before the management infrastructure. + // However during a truncated initialization in a cluster, agent != 0. We always pass 0 as the agent for the + // TplStore to keep things consistent in a cluster. See https://bugzilla.redhat.com/show_bug.cgi?id=681026 + tplStorePtr.reset(new TplJournalImpl(broker->getTimer(), "TplStore", getTplBaseDir(), "tpl", defJournalGetEventsTimeout, defJournalFlushTimeout, 0)); + isInit = true; + } catch (const DbException& e) { + if (e.get_errno() == DB_VERSION_MISMATCH) + { + QPID_LOG(error, "Database environment mismatch: This version of db4 does not match that which created the store database.: " << e.what()); + THROW_STORE_EXCEPTION_2("Database environment mismatch: This version of db4 does not match that which created the store database. " + "(If recovery is not important, delete the contents of the store directory. Otherwise, try upgrading the database using " + "db_upgrade or using db_recover - but the db4-utils package must also be installed to use these utilities.)", e); + } + QPID_LOG(error, "BDB exception occurred while initializing store: " << e.what()); + if (bdbRetryCnt >= retryMax) + THROW_STORE_EXCEPTION_2("BDB exception occurred while initializing store", e); + } catch (const StoreException&) { + throw; + } catch (const journal::jexception& e) { + QPID_LOG(error, "Journal Exception occurred while initializing store: " << e); + THROW_STORE_EXCEPTION_2("Journal Exception occurred while initializing store", e.what()); + } catch (...) { + QPID_LOG(error, "Unknown exception occurred while initializing store."); + throw; + } + } while (!isInit); +} + +void MessageStoreImpl::finalize() +{ + if (tplStorePtr.get() && tplStorePtr->is_ready()) tplStorePtr->stop(true); + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + for (JournalListMapItr i = journalList.begin(); i != journalList.end(); i++) + { + JournalImpl* jQueue = i->second; + jQueue->resetDeleteCallback(); + if (jQueue->is_ready()) jQueue->stop(true); + } + } + + if (mgmtObject.get() != 0) { + mgmtObject->resourceDestroy(); + mgmtObject.reset(); + } +} + +void MessageStoreImpl::truncateInit(const bool saveStoreContent) +{ + if (isInit) { + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + if (journalList.size()) { // check no queues exist + std::ostringstream oss; + oss << "truncateInit() called with " << journalList.size() << " queues still in existence"; + THROW_STORE_EXCEPTION(oss.str()); + } + } + closeDbs(); + dbs.clear(); + if (tplStorePtr->is_ready()) tplStorePtr->stop(true); + dbenv->close(0); + isInit = false; + } + std::ostringstream oss; + oss << storeDir << "/" << storeTopLevelDir; + if (saveStoreContent) { + std::string dir = mrg::journal::jdir::push_down(storeDir, storeTopLevelDir, "cluster"); + QPID_LOG(notice, "Store directory " << oss.str() << " was pushed down (saved) into directory " << dir << "."); + } else { + mrg::journal::jdir::delete_dir(oss.str().c_str()); + QPID_LOG(notice, "Store directory " << oss.str() << " was truncated."); + } + init(); +} + +void MessageStoreImpl::chkTplStoreInit() +{ + // Prevent multiple threads from late-initializing the TPL + qpid::sys::Mutex::ScopedLock sl(tplInitLock); + if (!tplStorePtr->is_ready()) { + journal::jdir::create_dir(getTplBaseDir()); + tplStorePtr->initialize(tplNumJrnlFiles, false, 0, tplJrnlFsizeSblks, tplWCacheNumPages, tplWCachePgSizeSblks); + if (mgmtObject.get() != 0) mgmtObject->set_tplIsInitialized(true); + } +} + +void MessageStoreImpl::open(db_ptr db, + DbTxn* txn, + const char* file, + bool dupKey) +{ + if(dupKey) db->set_flags(DB_DUPSORT); + db->open(txn, file, 0, DB_BTREE, DB_CREATE | DB_THREAD, 0); +} + +void MessageStoreImpl::closeDbs() +{ + for (std::list<db_ptr >::iterator i = dbs.begin(); i != dbs.end(); i++) { + (*i)->close(0); + } + dbs.clear(); +} + +MessageStoreImpl::~MessageStoreImpl() +{ + finalize(); + try { + closeDbs(); + } catch (const DbException& e) { + QPID_LOG(error, "Error closing BDB databases: " << e.what()); + } catch (const journal::jexception& e) { + QPID_LOG(error, "Error: " << e.what()); + } catch (const std::exception& e) { + QPID_LOG(error, "Error: " << e.what()); + } catch (...) { + QPID_LOG(error, "Unknown error in MessageStoreImpl::~MessageStoreImpl()"); + } + + if (mgmtObject.get() != 0) { + mgmtObject->resourceDestroy(); + mgmtObject.reset(); + } +} + +void MessageStoreImpl::create(qpid::broker::PersistableQueue& queue, + const qpid::framing::FieldTable& args) +{ + checkInit(); + if (queue.getPersistenceId()) { + THROW_STORE_EXCEPTION("Queue already created: " + queue.getName()); + } + JournalImpl* jQueue = 0; + qpid::framing::FieldTable::ValuePtr value; + + u_int16_t localFileCount = numJrnlFiles; + bool localAutoExpandFlag = autoJrnlExpand; + u_int16_t localAutoExpandMaxFileCount = autoJrnlExpandMaxFiles; + u_int32_t localFileSizeSblks = jrnlFsizeSblks; + + value = args.get("qpid.file_count"); + if (value.get() != 0 && !value->empty() && value->convertsTo<int>()) + localFileCount = chkJrnlNumFilesParam((u_int16_t) value->get<int>(), "qpid.file_count"); + + value = args.get("qpid.file_size"); + if (value.get() != 0 && !value->empty() && value->convertsTo<int>()) + localFileSizeSblks = chkJrnlFileSizeParam((u_int32_t) value->get<int>(), "qpid.file_size", wCachePgSizeSblks) * JRNL_RMGR_PAGE_SIZE; + + if (queue.getName().size() == 0) + { + QPID_LOG(error, "Cannot create store for empty (null) queue name - ignoring and attempting to continue."); + return; + } + + jQueue = new JournalImpl(broker->getTimer(), queue.getName(), getJrnlDir(queue), std::string("JournalData"), + defJournalGetEventsTimeout, defJournalFlushTimeout, agent, + boost::bind(&MessageStoreImpl::journalDeleted, this, _1)); + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + journalList[queue.getName()]=jQueue; + } + + value = args.get("qpid.auto_expand"); + if (value.get() != 0 && !value->empty() && value->convertsTo<bool>()) + localAutoExpandFlag = (bool) value->get<bool>(); + + value = args.get("qpid.auto_expand_max_jfiles"); + if (value.get() != 0 && !value->empty() && value->convertsTo<int>()) + localAutoExpandMaxFileCount = (u_int16_t) value->get<int>(); + + queue.setExternalQueueStore(dynamic_cast<qpid::broker::ExternalQueueStore*>(jQueue)); + try { + // init will create the deque's for the init... + jQueue->initialize(localFileCount, localAutoExpandFlag, localAutoExpandMaxFileCount, localFileSizeSblks, wCacheNumPages, wCachePgSizeSblks); + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queue.getName() + ": create() failed: " + e.what()); + } + try { + if (!create(queueDb, queueIdSequence, queue)) { + THROW_STORE_EXCEPTION("Queue already exists: " + queue.getName()); + } + } catch (const DbException& e) { + THROW_STORE_EXCEPTION_2("Error creating queue named " + queue.getName(), e); + } +} + +void MessageStoreImpl::destroy(qpid::broker::PersistableQueue& queue) +{ + checkInit(); + destroy(queueDb, queue); + deleteBindingsForQueue(queue); + qpid::broker::ExternalQueueStore* eqs = queue.getExternalQueueStore(); + if (eqs) { + JournalImpl* jQueue = static_cast<JournalImpl*>(eqs); + jQueue->delete_jrnl_files(); + queue.setExternalQueueStore(0); // will delete the journal if exists + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + journalList.erase(queue.getName()); + } + } +} + +void MessageStoreImpl::create(const qpid::broker::PersistableExchange& exchange, + const qpid::framing::FieldTable& /*args*/) +{ + checkInit(); + if (exchange.getPersistenceId()) { + THROW_STORE_EXCEPTION("Exchange already created: " + exchange.getName()); + } + try { + if (!create(exchangeDb, exchangeIdSequence, exchange)) { + THROW_STORE_EXCEPTION("Exchange already exists: " + exchange.getName()); + } + } catch (const DbException& e) { + THROW_STORE_EXCEPTION_2("Error creating exchange named " + exchange.getName(), e); + } +} + +void MessageStoreImpl::destroy(const qpid::broker::PersistableExchange& exchange) +{ + checkInit(); + destroy(exchangeDb, exchange); + //need to also delete bindings + IdDbt key(exchange.getPersistenceId()); + bindingDb->del(0, &key, DB_AUTO_COMMIT); +} + +void MessageStoreImpl::create(const qpid::broker::PersistableConfig& general) +{ + checkInit(); + if (general.getPersistenceId()) { + THROW_STORE_EXCEPTION("General configuration item already created"); + } + try { + if (!create(generalDb, generalIdSequence, general)) { + THROW_STORE_EXCEPTION("General configuration already exists"); + } + } catch (const DbException& e) { + THROW_STORE_EXCEPTION_2("Error creating general configuration", e); + } +} + +void MessageStoreImpl::destroy(const qpid::broker::PersistableConfig& general) +{ + checkInit(); + destroy(generalDb, general); +} + +bool MessageStoreImpl::create(db_ptr db, + IdSequence& seq, + const qpid::broker::Persistable& p) +{ + u_int64_t id (seq.next()); + Dbt key(&id, sizeof(id)); + BufferValue value (p); + + int status; + TxnCtxt txn; + txn.begin(dbenv.get(), true); + try { + status = db->put(txn.get(), &key, &value, DB_NOOVERWRITE); + txn.commit(); + } catch (...) { + txn.abort(); + throw; + } + if (status == DB_KEYEXIST) { + return false; + } else { + p.setPersistenceId(id); + return true; + } +} + +void MessageStoreImpl::destroy(db_ptr db, const qpid::broker::Persistable& p) +{ + qpid::sys::Mutex::ScopedLock sl(bdbLock); + IdDbt key(p.getPersistenceId()); + db->del(0, &key, DB_AUTO_COMMIT); +} + + +void MessageStoreImpl::bind(const qpid::broker::PersistableExchange& e, + const qpid::broker::PersistableQueue& q, + const std::string& k, + const qpid::framing::FieldTable& a) +{ + checkInit(); + IdDbt key(e.getPersistenceId()); + BindingDbt value(e, q, k, a); + TxnCtxt txn; + txn.begin(dbenv.get(), true); + try { + put(bindingDb, txn.get(), key, value); + txn.commit(); + } catch (...) { + txn.abort(); + throw; + } +} + +void MessageStoreImpl::unbind(const qpid::broker::PersistableExchange& e, + const qpid::broker::PersistableQueue& q, + const std::string& k, + const qpid::framing::FieldTable&) +{ + checkInit(); + deleteBinding(e, q, k); +} + +void MessageStoreImpl::recover(qpid::broker::RecoveryManager& registry) +{ + checkInit(); + txn_list prepared; + recoverLockedMappings(prepared); + + queue_index queues;//id->queue + exchange_index exchanges;//id->exchange + message_index messages;//id->message + + TxnCtxt txn; + txn.begin(dbenv.get(), false); + try { + //read all queues, calls recoversMessages + recoverQueues(txn, registry, queues, prepared, messages); + + //recover exchange & bindings: + recoverExchanges(txn, registry, exchanges); + recoverBindings(txn, exchanges, queues); + + //recover general-purpose configuration + recoverGeneral(txn, registry); + + txn.commit(); + } catch (const DbException& e) { + txn.abort(); + THROW_STORE_EXCEPTION_2("Error on recovery", e); + } catch (...) { + txn.abort(); + throw; + } + + //recover transactions: + for (txn_list::iterator i = prepared.begin(); i != prepared.end(); i++) { + const PreparedTransaction pt = *i; + if (mgmtObject.get() != 0) { + mgmtObject->inc_tplTransactionDepth(); + mgmtObject->inc_tplTxnPrepares(); + } + + std::string xid = pt.xid; + + // Restore data token state in TxnCtxt + TplRecoverMapCitr citr = tplRecoverMap.find(xid); + if (citr == tplRecoverMap.end()) THROW_STORE_EXCEPTION("XID not found in tplRecoverMap"); + + // If a record is found that is dequeued but not committed/aborted from tplStore, then a complete() call + // was interrupted part way through committing/aborting the impacted queues. Complete this process. + bool incomplTplTxnFlag = citr->second.deq_flag; + + if (citr->second.tpc_flag) { + // Dtx (2PC) transaction + TPCTxnCtxt* tpcc = new TPCTxnCtxt(xid, &messageIdSequence); + std::auto_ptr<qpid::broker::TPCTransactionContext> txn(tpcc); + tpcc->recoverDtok(citr->second.rid, xid); + tpcc->prepare(tplStorePtr.get()); + + qpid::broker::RecoverableTransaction::shared_ptr dtx; + if (!incomplTplTxnFlag) dtx = registry.recoverTransaction(xid, txn); + if (pt.enqueues.get()) { + for (LockedMappings::iterator j = pt.enqueues->begin(); j != pt.enqueues->end(); j++) { + tpcc->addXidRecord(queues[j->first]->getExternalQueueStore()); + if (!incomplTplTxnFlag) dtx->enqueue(queues[j->first], messages[j->second]); + } + } + if (pt.dequeues.get()) { + for (LockedMappings::iterator j = pt.dequeues->begin(); j != pt.dequeues->end(); j++) { + tpcc->addXidRecord(queues[j->first]->getExternalQueueStore()); + if (!incomplTplTxnFlag) dtx->dequeue(queues[j->first], messages[j->second]); + } + } + + if (incomplTplTxnFlag) { + tpcc->complete(citr->second.commit_flag); + } + } else { + // Local (1PC) transaction + boost::shared_ptr<TxnCtxt> opcc(new TxnCtxt(xid, &messageIdSequence)); + opcc->recoverDtok(citr->second.rid, xid); + opcc->prepare(tplStorePtr.get()); + + if (pt.enqueues.get()) { + for (LockedMappings::iterator j = pt.enqueues->begin(); j != pt.enqueues->end(); j++) { + opcc->addXidRecord(queues[j->first]->getExternalQueueStore()); + } + } + if (pt.dequeues.get()) { + for (LockedMappings::iterator j = pt.dequeues->begin(); j != pt.dequeues->end(); j++) { + opcc->addXidRecord(queues[j->first]->getExternalQueueStore()); + } + } + if (incomplTplTxnFlag) { + opcc->complete(citr->second.commit_flag); + } else { + completed(*opcc.get(), citr->second.commit_flag); + } + } + } + registry.recoveryComplete(); +} + +void MessageStoreImpl::recoverQueues(TxnCtxt& txn, + qpid::broker::RecoveryManager& registry, + queue_index& queue_index, + txn_list& prepared, + message_index& messages) +{ + Cursor queues; + queues.open(queueDb, txn.get()); + + u_int64_t maxQueueId(1); + + IdDbt key; + Dbt value; + //read all queues + while (queues.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + //create a Queue instance + qpid::broker::RecoverableQueue::shared_ptr queue = registry.recoverQueue(buffer); + //set the persistenceId and update max as required + queue->setPersistenceId(key.id); + + const std::string queueName = queue->getName().c_str(); + JournalImpl* jQueue = 0; + if (queueName.size() == 0) + { + QPID_LOG(error, "Cannot recover empty (null) queue name - ignoring and attempting to continue."); + break; + } + jQueue = new JournalImpl(broker->getTimer(), queueName, getJrnlHashDir(queueName), std::string("JournalData"), + defJournalGetEventsTimeout, defJournalFlushTimeout, agent, + boost::bind(&MessageStoreImpl::journalDeleted, this, _1)); + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + journalList[queueName] = jQueue; + } + queue->setExternalQueueStore(dynamic_cast<qpid::broker::ExternalQueueStore*>(jQueue)); + + try + { + long rcnt = 0L; // recovered msg count + long idcnt = 0L; // in-doubt msg count + u_int64_t thisHighestRid = 0ULL; + jQueue->recover(numJrnlFiles, autoJrnlExpand, autoJrnlExpandMaxFiles, jrnlFsizeSblks, wCacheNumPages, wCachePgSizeSblks, &prepared, thisHighestRid, key.id); // start recovery + if (highestRid == 0ULL) + highestRid = thisHighestRid; + else if (thisHighestRid - highestRid < 0x8000000000000000ULL) // RFC 1982 comparison for unsigned 64-bit + highestRid = thisHighestRid; + recoverMessages(txn, registry, queue, prepared, messages, rcnt, idcnt); + QPID_LOG(info, "Recovered queue \"" << queueName << "\": " << rcnt << " messages recovered; " << idcnt << " messages in-doubt."); + jQueue->recover_complete(); // start journal. + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queueName + ": recoverQueues() failed: " + e.what()); + } + //read all messages: done on a per queue basis if using Journal + + queue_index[key.id] = queue; + maxQueueId = std::max(key.id, maxQueueId); + } + + // NOTE: highestRid is set by both recoverQueues() and recoverTplStore() as + // the messageIdSequence is used for both queue journals and the tpl journal. + messageIdSequence.reset(highestRid + 1); + QPID_LOG(info, "Most recent persistence id found: 0x" << std::hex << highestRid << std::dec); + + queueIdSequence.reset(maxQueueId + 1); +} + + +void MessageStoreImpl::recoverExchanges(TxnCtxt& txn, + qpid::broker::RecoveryManager& registry, + exchange_index& index) +{ + //TODO: this is a copy&paste from recoverQueues - refactor! + Cursor exchanges; + exchanges.open(exchangeDb, txn.get()); + + u_int64_t maxExchangeId(1); + IdDbt key; + Dbt value; + //read all exchanges + while (exchanges.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + //create a Exchange instance + qpid::broker::RecoverableExchange::shared_ptr exchange = registry.recoverExchange(buffer); + if (exchange) { + //set the persistenceId and update max as required + exchange->setPersistenceId(key.id); + index[key.id] = exchange; + QPID_LOG(info, "Recovered exchange \"" << exchange->getName() << '"'); + } + maxExchangeId = std::max(key.id, maxExchangeId); + } + exchangeIdSequence.reset(maxExchangeId + 1); +} + +void MessageStoreImpl::recoverBindings(TxnCtxt& txn, + exchange_index& exchanges, + queue_index& queues) +{ + Cursor bindings; + bindings.open(bindingDb, txn.get()); + + IdDbt key; + Dbt value; + while (bindings.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + if (buffer.available() < 8) { + QPID_LOG(error, "Not enough data for binding: " << buffer.available()); + THROW_STORE_EXCEPTION("Not enough data for binding"); + } + uint64_t queueId = buffer.getLongLong(); + std::string queueName; + std::string routingkey; + qpid::framing::FieldTable args; + buffer.getShortString(queueName); + buffer.getShortString(routingkey); + buffer.get(args); + exchange_index::iterator exchange = exchanges.find(key.id); + queue_index::iterator queue = queues.find(queueId); + if (exchange != exchanges.end() && queue != queues.end()) { + //could use the recoverable queue here rather than the name... + exchange->second->bind(queueName, routingkey, args); + QPID_LOG(info, "Recovered binding exchange=" << exchange->second->getName() + << " key=" << routingkey + << " queue=" << queueName); + } else { + //stale binding, delete it + QPID_LOG(warning, "Deleting stale binding"); + bindings->del(0); + } + } +} + +void MessageStoreImpl::recoverGeneral(TxnCtxt& txn, + qpid::broker::RecoveryManager& registry) +{ + Cursor items; + items.open(generalDb, txn.get()); + + u_int64_t maxGeneralId(1); + IdDbt key; + Dbt value; + //read all items + while (items.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + //create instance + qpid::broker::RecoverableConfig::shared_ptr config = registry.recoverConfig(buffer); + //set the persistenceId and update max as required + config->setPersistenceId(key.id); + maxGeneralId = std::max(key.id, maxGeneralId); + } + generalIdSequence.reset(maxGeneralId + 1); +} + +void MessageStoreImpl::recoverMessages(TxnCtxt& /*txn*/, + qpid::broker::RecoveryManager& recovery, + qpid::broker::RecoverableQueue::shared_ptr& queue, + txn_list& prepared, + message_index& messages, + long& rcnt, + long& idcnt) +{ + size_t preambleLength = sizeof(u_int32_t)/*header size*/; + + JournalImpl* jc = static_cast<JournalImpl*>(queue->getExternalQueueStore()); + DataTokenImpl dtok; + size_t readSize = 0; + unsigned msg_count = 0; + + // TODO: This optimization to skip reading if there are no enqueued messages to read + // breaks the python system test in phase 6 with "Exception: Cannot write lock file" + // Figure out what is breaking. + //bool read = jc->get_enq_cnt() > 0; + bool read = true; + + void* dbuff = NULL; size_t dbuffSize = 0; + void* xidbuff = NULL; size_t xidbuffSize = 0; + bool transientFlag = false; + bool externalFlag = false; + + dtok.set_wstate(DataTokenImpl::ENQ); + + // Read the message from the Journal. + try { + unsigned aio_sleep_cnt = 0; + while (read) { + mrg::journal::iores res = jc->read_data_record(&dbuff, dbuffSize, &xidbuff, xidbuffSize, transientFlag, externalFlag, &dtok); + readSize = dtok.dsize(); + + switch (res) + { + case mrg::journal::RHM_IORES_SUCCESS: { + msg_count++; + qpid::broker::RecoverableMessage::shared_ptr msg; + char* data = (char*)dbuff; + + unsigned headerSize; + if (externalFlag) { + msg = getExternMessage(recovery, dtok.rid(), headerSize); // large message external to jrnl + } else { + headerSize = qpid::framing::Buffer(data, preambleLength).getLong(); + qpid::framing::Buffer headerBuff(data+ preambleLength, headerSize); /// do we want read size or header size ???? + msg = recovery.recoverMessage(headerBuff); + } + msg->setPersistenceId(dtok.rid()); + // At some future point if delivery attempts are stored, then this call would + // become optional depending on that information. + msg->setRedelivered(); + // Reset the TTL for the recovered message + msg->computeExpiration(broker->getExpiryPolicy()); + + u_int32_t contentOffset = headerSize + preambleLength; + u_int64_t contentSize = readSize - contentOffset; + if (msg->loadContent(contentSize) && !externalFlag) { + //now read the content + qpid::framing::Buffer contentBuff(data + contentOffset, contentSize); + msg->decodeContent(contentBuff); + } + + PreparedTransaction::list::iterator i = PreparedTransaction::getLockedPreparedTransaction(prepared, queue->getPersistenceId(), dtok.rid()); + if (i == prepared.end()) { // not in prepared list + rcnt++; + queue->recover(msg); + } else { + u_int64_t rid = dtok.rid(); + std::string xid(i->xid); + TplRecoverMapCitr citr = tplRecoverMap.find(xid); + if (citr == tplRecoverMap.end()) THROW_STORE_EXCEPTION("XID not found in tplRecoverMap"); + + // deq present in prepared list: this xid is part of incomplete txn commit/abort + // or this is a 1PC txn that must be rolled forward + if (citr->second.deq_flag || !citr->second.tpc_flag) { + if (jc->is_enqueued(rid, true)) { + // Enqueue is non-tx, dequeue tx + assert(jc->is_locked(rid)); // This record MUST be locked by a txn dequeue + if (!citr->second.commit_flag) { + rcnt++; + queue->recover(msg); // recover message in abort case only + } + } else { + // Enqueue and/or dequeue tx + journal::txn_map& tmap = jc->get_txn_map(); + journal::txn_data_list txnList = tmap.get_tdata_list(xid); // txnList will be empty if xid not found + bool enq = false; + bool deq = false; + for (journal::tdl_itr j = txnList.begin(); j<txnList.end(); j++) { + if (j->_enq_flag && j->_rid == rid) enq = true; + else if (!j->_enq_flag && j->_drid == rid) deq = true; + } + if (enq && !deq && citr->second.commit_flag) { + rcnt++; + queue->recover(msg); // recover txn message in commit case only + } + } + } else { + idcnt++; + messages[rid] = msg; + } + } + + dtok.reset(); + dtok.set_wstate(DataTokenImpl::ENQ); + + if (xidbuff) + ::free(xidbuff); + else if (dbuff) + ::free(dbuff); + aio_sleep_cnt = 0; + break; + } + case mrg::journal::RHM_IORES_PAGE_AIOWAIT: + if (++aio_sleep_cnt > MAX_AIO_SLEEPS) + THROW_STORE_EXCEPTION("Timeout waiting for AIO in MessageStoreImpl::recoverMessages()"); + ::usleep(AIO_SLEEP_TIME_US); + break; + case mrg::journal::RHM_IORES_EMPTY: + read = false; + break; // done with all messages. (add call in jrnl to test that _emap is empty.) + default: + std::ostringstream oss; + oss << "recoverMessages(): Queue: " << queue->getName() << ": Unexpected return from journal read: " << mrg::journal::iores_str(res); + THROW_STORE_EXCEPTION(oss.str()); + } // switch + } // while + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queue->getName() + ": recoverMessages() failed: " + e.what()); + } +} + +qpid::broker::RecoverableMessage::shared_ptr MessageStoreImpl::getExternMessage(qpid::broker::RecoveryManager& /*recovery*/, + uint64_t /*messageId*/, + unsigned& /*headerSize*/) +{ + throw mrg::journal::jexception(mrg::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "getExternMessage"); +} + +int MessageStoreImpl::enqueueMessage(TxnCtxt& txn, + IdDbt& msgId, + qpid::broker::RecoverableMessage::shared_ptr& msg, + queue_index& index, + txn_list& prepared, + message_index& messages) +{ + Cursor mappings; + mappings.open(mappingDb, txn.get()); + + IdDbt value; + + int count(0); + for (int status = mappings->get(&msgId, &value, DB_SET); status == 0; status = mappings->get(&msgId, &value, DB_NEXT_DUP)) { + if (index.find(value.id) == index.end()) { + QPID_LOG(warning, "Recovered message for queue that no longer exists"); + mappings->del(0); + } else { + qpid::broker::RecoverableQueue::shared_ptr queue = index[value.id]; + if (PreparedTransaction::isLocked(prepared, value.id, msgId.id)) { + messages[msgId.id] = msg; + } else { + queue->recover(msg); + } + count++; + } + } + mappings.close(); + return count; +} + +void MessageStoreImpl::readTplStore() +{ + tplRecoverMap.clear(); + journal::txn_map& tmap = tplStorePtr->get_txn_map(); + DataTokenImpl dtok; + void* dbuff = NULL; size_t dbuffSize = 0; + void* xidbuff = NULL; size_t xidbuffSize = 0; + bool transientFlag = false; + bool externalFlag = false; + bool done = false; + try { + unsigned aio_sleep_cnt = 0; + while (!done) { + dtok.reset(); + dtok.set_wstate(DataTokenImpl::ENQ); + mrg::journal::iores res = tplStorePtr->read_data_record(&dbuff, dbuffSize, &xidbuff, xidbuffSize, transientFlag, externalFlag, &dtok); + switch (res) { + case mrg::journal::RHM_IORES_SUCCESS: { + // Every TPL record contains both data and an XID + assert(dbuffSize>0); + assert(xidbuffSize>0); + std::string xid(static_cast<const char*>(xidbuff), xidbuffSize); + bool is2PC = *(static_cast<char*>(dbuff)) != 0; + + // Check transaction details; add to recover map + journal::txn_data_list txnList = tmap.get_tdata_list(xid); // txnList will be empty if xid not found + if (!txnList.empty()) { // xid found in tmap + unsigned enqCnt = 0; + unsigned deqCnt = 0; + u_int64_t rid = 0; + + // Assume commit (roll forward) in cases where only prepare has been called - ie only enqueue record exists. + // Note: will apply to both 1PC and 2PC transactions. + bool commitFlag = true; + + for (journal::tdl_itr j = txnList.begin(); j<txnList.end(); j++) { + if (j->_enq_flag) { + rid = j->_rid; + enqCnt++; + } else { + commitFlag = j->_commit_flag; + deqCnt++; + } + } + assert(enqCnt == 1); + assert(deqCnt <= 1); + tplRecoverMap.insert(TplRecoverMapPair(xid, TplRecoverStruct(rid, deqCnt == 1, commitFlag, is2PC))); + } + + ::free(xidbuff); + aio_sleep_cnt = 0; + break; + } + case mrg::journal::RHM_IORES_PAGE_AIOWAIT: + if (++aio_sleep_cnt > MAX_AIO_SLEEPS) + THROW_STORE_EXCEPTION("Timeout waiting for AIO in MessageStoreImpl::recoverTplStore()"); + ::usleep(AIO_SLEEP_TIME_US); + break; + case mrg::journal::RHM_IORES_EMPTY: + done = true; + break; // done with all messages. (add call in jrnl to test that _emap is empty.) + default: + std::ostringstream oss; + oss << "readTplStore(): Unexpected result from journal read: " << mrg::journal::iores_str(res); + THROW_STORE_EXCEPTION(oss.str()); + } // switch + } + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("TPL recoverTplStore() failed: ") + e.what()); + } +} + +void MessageStoreImpl::recoverTplStore() +{ + if (journal::jdir::exists(tplStorePtr->jrnl_dir() + tplStorePtr->base_filename() + ".jinf")) { + u_int64_t thisHighestRid = 0ULL; + tplStorePtr->recover(tplNumJrnlFiles, false, 0, tplJrnlFsizeSblks, tplWCachePgSizeSblks, tplWCacheNumPages, 0, thisHighestRid, 0); + if (highestRid == 0ULL) + highestRid = thisHighestRid; + else if (thisHighestRid - highestRid < 0x8000000000000000ULL) // RFC 1982 comparison for unsigned 64-bit + highestRid = thisHighestRid; + + // Load tplRecoverMap by reading the TPL store + readTplStore(); + + tplStorePtr->recover_complete(); // start journal. + } +} + +void MessageStoreImpl::recoverLockedMappings(txn_list& txns) +{ + if (!tplStorePtr->is_ready()) + recoverTplStore(); + + // Abort unprepared xids and populate the locked maps + for (TplRecoverMapCitr i = tplRecoverMap.begin(); i != tplRecoverMap.end(); i++) { + LockedMappings::shared_ptr enq_ptr; + enq_ptr.reset(new LockedMappings); + LockedMappings::shared_ptr deq_ptr; + deq_ptr.reset(new LockedMappings); + txns.push_back(new PreparedTransaction(i->first, enq_ptr, deq_ptr)); + } +} + +void MessageStoreImpl::collectPreparedXids(std::set<std::string>& xids) +{ + if (tplStorePtr->is_ready()) { + tplStorePtr->read_reset(); + readTplStore(); + } else { + recoverTplStore(); + } + for (TplRecoverMapCitr i = tplRecoverMap.begin(); i != tplRecoverMap.end(); i++) { + // Discard all txns that are to be rolled forward/back and 1PC transactions + if (!i->second.deq_flag && i->second.tpc_flag) + xids.insert(i->first); + } +} + +void MessageStoreImpl::stage(const boost::intrusive_ptr<qpid::broker::PersistableMessage>& /*msg*/) +{ + throw mrg::journal::jexception(mrg::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "stage"); +} + +void MessageStoreImpl::destroy(qpid::broker::PersistableMessage& /*msg*/) +{ + throw mrg::journal::jexception(mrg::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "destroy"); +} + +void MessageStoreImpl::appendContent(const boost::intrusive_ptr<const qpid::broker::PersistableMessage>& /*msg*/, + const std::string& /*data*/) +{ + throw mrg::journal::jexception(mrg::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "appendContent"); +} + +void MessageStoreImpl::loadContent(const qpid::broker::PersistableQueue& queue, + const boost::intrusive_ptr<const qpid::broker::PersistableMessage>& msg, + std::string& data, + u_int64_t offset, + u_int32_t length) +{ + checkInit(); + u_int64_t messageId (msg->getPersistenceId()); + + if (messageId != 0) { + try { + JournalImpl* jc = static_cast<JournalImpl*>(queue.getExternalQueueStore()); + if (jc && jc->is_enqueued(messageId) ) { + if (!jc->loadMsgContent(messageId, data, length, offset)) { + std::ostringstream oss; + oss << "Queue " << queue.getName() << ": loadContent() failed: Message " << messageId << " is extern"; + THROW_STORE_EXCEPTION(oss.str()); + } + } else { + std::ostringstream oss; + oss << "Queue " << queue.getName() << ": loadContent() failed: Message " << messageId << " not enqueued"; + THROW_STORE_EXCEPTION(oss.str()); + } + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queue.getName() + ": loadContent() failed: " + e.what()); + } + } else { + THROW_STORE_EXCEPTION("Cannot load content. Message not known to store!"); + } +} + +void MessageStoreImpl::flush(const qpid::broker::PersistableQueue& queue) +{ + if (queue.getExternalQueueStore() == 0) return; + checkInit(); + std::string qn = queue.getName(); + try { + JournalImpl* jc = static_cast<JournalImpl*>(queue.getExternalQueueStore()); + if (jc) { + // TODO: check if this result should be used... + /*mrg::journal::iores res =*/ jc->flush(); + } + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + qn + ": flush() failed: " + e.what() ); + } +} + +void MessageStoreImpl::enqueue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue) +{ + checkInit(); + u_int64_t queueId (queue.getPersistenceId()); + u_int64_t messageId (msg->getPersistenceId()); + if (queueId == 0) { + THROW_STORE_EXCEPTION("Queue not created: " + queue.getName()); + } + + TxnCtxt implicit; + TxnCtxt* txn = 0; + if (ctxt) { + txn = check(ctxt); + } else { + txn = &implicit; + } + + bool newId = false; + if (messageId == 0) { + messageId = messageIdSequence.next(); + msg->setPersistenceId(messageId); + newId = true; + } + store(&queue, txn, msg, newId); + + // add queue* to the txn map.. + if (ctxt) txn->addXidRecord(queue.getExternalQueueStore()); +} + +u_int64_t MessageStoreImpl::msgEncode(std::vector<char>& buff, const boost::intrusive_ptr<qpid::broker::PersistableMessage>& message) +{ + u_int32_t headerSize = message->encodedHeaderSize(); + u_int64_t size = message->encodedSize() + sizeof(u_int32_t); + try { buff = std::vector<char>(size); } // long + headers + content + catch (const std::exception& e) { + std::ostringstream oss; + oss << "Unable to allocate memory for encoding message; requested size: " << size << "; error: " << e.what(); + THROW_STORE_EXCEPTION(oss.str()); + } + qpid::framing::Buffer buffer(&buff[0],size); + buffer.putLong(headerSize); + message->encode(buffer); + return size; +} + +void MessageStoreImpl::store(const qpid::broker::PersistableQueue* queue, + TxnCtxt* txn, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& message, + bool /*newId*/) +{ + std::vector<char> buff; + u_int64_t size = msgEncode(buff, message); + + try { + if (queue) { + boost::intrusive_ptr<DataTokenImpl> dtokp(new DataTokenImpl); + dtokp->addRef(); + dtokp->setSourceMessage(message); + dtokp->set_external_rid(true); + dtokp->set_rid(message->getPersistenceId()); // set the messageID into the Journal header (record-id) + + JournalImpl* jc = static_cast<JournalImpl*>(queue->getExternalQueueStore()); + if (txn->getXid().empty()) { + jc->enqueue_data_record(&buff[0], size, size, dtokp.get(), !message->isPersistent()); + } else { + jc->enqueue_txn_data_record(&buff[0], size, size, dtokp.get(), txn->getXid(), !message->isPersistent()); + } + } else { + THROW_STORE_EXCEPTION(std::string("MessageStoreImpl::store() failed: queue NULL.")); + } + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queue->getName() + ": MessageStoreImpl::store() failed: " + + e.what()); + } +} + +void MessageStoreImpl::dequeue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue) +{ + checkInit(); + u_int64_t queueId (queue.getPersistenceId()); + u_int64_t messageId (msg->getPersistenceId()); + if (queueId == 0) { + THROW_STORE_EXCEPTION("Queue \"" + queue.getName() + "\" has null queue Id (has not been created)"); + } + if (messageId == 0) { + THROW_STORE_EXCEPTION("Queue \"" + queue.getName() + "\": Dequeuing message with null persistence Id."); + } + + TxnCtxt implicit; + TxnCtxt* txn = 0; + if (ctxt) { + txn = check(ctxt); + } else { + txn = &implicit; + } + + // add queue* to the txn map.. + if (ctxt) txn->addXidRecord(queue.getExternalQueueStore()); + async_dequeue(ctxt, msg, queue); + + msg->dequeueComplete(); +} + +void MessageStoreImpl::async_dequeue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue) +{ + boost::intrusive_ptr<DataTokenImpl> ddtokp(new DataTokenImpl); + ddtokp->setSourceMessage(msg); + ddtokp->set_external_rid(true); + ddtokp->set_rid(messageIdSequence.next()); + ddtokp->set_dequeue_rid(msg->getPersistenceId()); + ddtokp->set_wstate(DataTokenImpl::ENQ); + std::string tid; + if (ctxt) { + TxnCtxt* txn = check(ctxt); + tid = txn->getXid(); + } + // Manually increase the ref count, as raw pointers are used beyond this point + ddtokp->addRef(); + try { + JournalImpl* jc = static_cast<JournalImpl*>(queue.getExternalQueueStore()); + if (tid.empty()) { + jc->dequeue_data_record(ddtokp.get()); + } else { + jc->dequeue_txn_data_record(ddtokp.get(), tid); + } + } catch (const journal::jexception& e) { + ddtokp->release(); + THROW_STORE_EXCEPTION(std::string("Queue ") + queue.getName() + ": async_dequeue() failed: " + e.what()); + } +} + +u_int32_t MessageStoreImpl::outstandingQueueAIO(const qpid::broker::PersistableQueue& /*queue*/) +{ + checkInit(); + return 0; +} + +void MessageStoreImpl::completed(TxnCtxt& txn, + bool commit) +{ + try { + chkTplStoreInit(); // Late initialize (if needed) + + // Nothing to do if not prepared + if (txn.getDtok()->is_enqueued()) { + txn.incrDtokRef(); + DataTokenImpl* dtokp = txn.getDtok(); + dtokp->set_dequeue_rid(dtokp->rid()); + dtokp->set_rid(messageIdSequence.next()); + tplStorePtr->dequeue_txn_data_record(txn.getDtok(), txn.getXid(), commit); + } + txn.complete(commit); + if (mgmtObject.get() != 0) { + mgmtObject->dec_tplTransactionDepth(); + if (commit) + mgmtObject->inc_tplTxnCommits(); + else + mgmtObject->inc_tplTxnAborts(); + } + } catch (const std::exception& e) { + QPID_LOG(error, "Error completing xid " << txn.getXid() << ": " << e.what()); + throw; + } +} + +std::auto_ptr<qpid::broker::TransactionContext> MessageStoreImpl::begin() +{ + checkInit(); + // pass sequence number for c/a + return std::auto_ptr<qpid::broker::TransactionContext>(new TxnCtxt(&messageIdSequence)); +} + +std::auto_ptr<qpid::broker::TPCTransactionContext> MessageStoreImpl::begin(const std::string& xid) +{ + checkInit(); + IdSequence* jtx = &messageIdSequence; + // pass sequence number for c/a + return std::auto_ptr<qpid::broker::TPCTransactionContext>(new TPCTxnCtxt(xid, jtx)); +} + +void MessageStoreImpl::prepare(qpid::broker::TPCTransactionContext& ctxt) +{ + checkInit(); + TxnCtxt* txn = dynamic_cast<TxnCtxt*>(&ctxt); + if(!txn) throw qpid::broker::InvalidTransactionContextException(); + localPrepare(txn); +} + +void MessageStoreImpl::localPrepare(TxnCtxt* ctxt) +{ + try { + chkTplStoreInit(); // Late initialize (if needed) + + // This sync is required to ensure multi-queue atomicity - ie all txn data + // must hit the disk on *all* queues before the TPL prepare (enq) is written. + ctxt->sync(); + + ctxt->incrDtokRef(); + DataTokenImpl* dtokp = ctxt->getDtok(); + dtokp->set_external_rid(true); + dtokp->set_rid(messageIdSequence.next()); + char tpcFlag = static_cast<char>(ctxt->isTPC()); + tplStorePtr->enqueue_txn_data_record(&tpcFlag, sizeof(char), sizeof(char), dtokp, ctxt->getXid(), false); + ctxt->prepare(tplStorePtr.get()); + // make sure all the data is written to disk before returning + ctxt->sync(); + if (mgmtObject.get() != 0) { + mgmtObject->inc_tplTransactionDepth(); + mgmtObject->inc_tplTxnPrepares(); + } + } catch (const std::exception& e) { + QPID_LOG(error, "Error preparing xid " << ctxt->getXid() << ": " << e.what()); + throw; + } +} + +void MessageStoreImpl::commit(qpid::broker::TransactionContext& ctxt) +{ + checkInit(); + TxnCtxt* txn(check(&ctxt)); + if (!txn->isTPC()) { + if (txn->impactedQueuesEmpty()) return; + localPrepare(dynamic_cast<TxnCtxt*>(txn)); + } + completed(*dynamic_cast<TxnCtxt*>(txn), true); +} + +void MessageStoreImpl::abort(qpid::broker::TransactionContext& ctxt) +{ + checkInit(); + TxnCtxt* txn(check(&ctxt)); + if (!txn->isTPC()) { + if (txn->impactedQueuesEmpty()) return; + localPrepare(dynamic_cast<TxnCtxt*>(txn)); + } + completed(*dynamic_cast<TxnCtxt*>(txn), false); +} + +TxnCtxt* MessageStoreImpl::check(qpid::broker::TransactionContext* ctxt) +{ + TxnCtxt* txn = dynamic_cast<TxnCtxt*>(ctxt); + if(!txn) throw qpid::broker::InvalidTransactionContextException(); + return txn; +} + +void MessageStoreImpl::put(db_ptr db, + DbTxn* txn, + Dbt& key, + Dbt& value) +{ + try { + int status = db->put(txn, &key, &value, DB_NODUPDATA); + if (status == DB_KEYEXIST) { + THROW_STORE_EXCEPTION("duplicate data"); + } else if (status) { + THROW_STORE_EXCEPTION(DbEnv::strerror(status)); + } + } catch (const DbException& e) { + THROW_STORE_EXCEPTION(e.what()); + } +} + +void MessageStoreImpl::deleteBindingsForQueue(const qpid::broker::PersistableQueue& queue) +{ + TxnCtxt txn; + txn.begin(dbenv.get(), true); + try { + { + Cursor bindings; + bindings.open(bindingDb, txn.get()); + + IdDbt key; + Dbt value; + while (bindings.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + if (buffer.available() < 8) { + THROW_STORE_EXCEPTION("Not enough data for binding"); + } + uint64_t queueId = buffer.getLongLong(); + if (queue.getPersistenceId() == queueId) { + bindings->del(0); + QPID_LOG(debug, "Deleting binding for " << queue.getName() << " " << key.id << "->" << queueId); + } + } + } + txn.commit(); + } catch (const std::exception& e) { + txn.abort(); + THROW_STORE_EXCEPTION_2("Error deleting bindings", e.what()); + } catch (...) { + txn.abort(); + throw; + } + QPID_LOG(debug, "Deleted all bindings for " << queue.getName() << ":" << queue.getPersistenceId()); +} + +void MessageStoreImpl::deleteBinding(const qpid::broker::PersistableExchange& exchange, + const qpid::broker::PersistableQueue& queue, + const std::string& bkey) +{ + TxnCtxt txn; + txn.begin(dbenv.get(), true); + try { + { + Cursor bindings; + bindings.open(bindingDb, txn.get()); + + IdDbt key(exchange.getPersistenceId()); + Dbt value; + + for (int status = bindings->get(&key, &value, DB_SET); status == 0; status = bindings->get(&key, &value, DB_NEXT_DUP)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + if (buffer.available() < 8) { + THROW_STORE_EXCEPTION("Not enough data for binding"); + } + uint64_t queueId = buffer.getLongLong(); + if (queue.getPersistenceId() == queueId) { + std::string q; + std::string k; + buffer.getShortString(q); + buffer.getShortString(k); + if (bkey == k) { + bindings->del(0); + QPID_LOG(debug, "Deleting binding for " << queue.getName() << " " << key.id << "->" << queueId); + } + } + } + } + txn.commit(); + } catch (const std::exception& e) { + txn.abort(); + THROW_STORE_EXCEPTION_2("Error deleting bindings", e.what()); + } catch (...) { + txn.abort(); + throw; + } +} + +std::string MessageStoreImpl::getJrnlBaseDir() +{ + std::ostringstream dir; + dir << storeDir << "/" << storeTopLevelDir << "/jrnl/" ; + return dir.str(); +} + +std::string MessageStoreImpl::getBdbBaseDir() +{ + std::ostringstream dir; + dir << storeDir << "/" << storeTopLevelDir << "/dat/" ; + return dir.str(); +} + +std::string MessageStoreImpl::getTplBaseDir() +{ + std::ostringstream dir; + dir << storeDir << "/" << storeTopLevelDir << "/tpl/" ; + return dir.str(); +} + +std::string MessageStoreImpl::getJrnlDir(const qpid::broker::PersistableQueue& queue) //for exmaple /var/rhm/ + queueDir/ +{ + return getJrnlHashDir(queue.getName().c_str()); +} + +u_int32_t MessageStoreImpl::bHash(const std::string str) +{ + // Daniel Bernstein hash fn + u_int32_t h = 0; + for (std::string::const_iterator i = str.begin(); i < str.end(); i++) + h = 33*h + *i; + return h; +} + +std::string MessageStoreImpl::getJrnlHashDir(const std::string& queueName) //for exmaple /var/rhm/ + queueDir/ +{ + std::stringstream dir; + dir << getJrnlBaseDir() << std::hex << std::setfill('0') << std::setw(4); + dir << (bHash(queueName.c_str()) % 29); // Use a prime number for better distribution across dirs + dir << "/" << queueName << "/"; + return dir.str(); +} + +std::string MessageStoreImpl::getStoreDir() const { return storeDir; } + +void MessageStoreImpl::journalDeleted(JournalImpl& j) { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + journalList.erase(j.id()); +} + +MessageStoreImpl::StoreOptions::StoreOptions(const std::string& name) : + qpid::Options(name), + numJrnlFiles(defNumJrnlFiles), + autoJrnlExpand(defAutoJrnlExpand), + autoJrnlExpandMaxFiles(defAutoJrnlExpandMaxFiles), + jrnlFsizePgs(defJrnlFileSizePgs), + truncateFlag(defTruncateFlag), + wCachePageSizeKib(defWCachePageSize), + tplNumJrnlFiles(defTplNumJrnlFiles), + tplJrnlFsizePgs(defTplJrnlFileSizePgs), + tplWCachePageSizeKib(defTplWCachePageSize) +{ + std::ostringstream oss1; + oss1 << "Default number of files for each journal instance (queue). [Allowable values: " << + JRNL_MIN_NUM_FILES << " - " << JRNL_MAX_NUM_FILES << "]"; + std::ostringstream oss2; + oss2 << "Default size for each journal file in multiples of read pages (1 read page = 64KiB). [Allowable values: " << + JRNL_MIN_FILE_SIZE / JRNL_RMGR_PAGE_SIZE << " - " << JRNL_MAX_FILE_SIZE / JRNL_RMGR_PAGE_SIZE << "]"; + std::ostringstream oss3; + oss3 << "Number of files for transaction prepared list journal instance. [Allowable values: " << + JRNL_MIN_NUM_FILES << " - " << JRNL_MAX_NUM_FILES << "]"; + std::ostringstream oss4; + oss4 << "Size of each transaction prepared list journal file in multiples of read pages (1 read page = 64KiB) [Allowable values: " << + JRNL_MIN_FILE_SIZE / JRNL_RMGR_PAGE_SIZE << " - " << JRNL_MAX_FILE_SIZE / JRNL_RMGR_PAGE_SIZE << "]"; + addOptions() + ("store-dir", qpid::optValue(storeDir, "DIR"), + "Store directory location for persistence (instead of using --data-dir value). " + "Required if --no-data-dir is also used.") + ("num-jfiles", qpid::optValue(numJrnlFiles, "N"), oss1.str().c_str()) + ("jfile-size-pgs", qpid::optValue(jrnlFsizePgs, "N"), oss2.str().c_str()) +// TODO: Uncomment these lines when auto-expand is enabled. +// ("auto-expand", qpid::optValue(autoJrnlExpand, "yes|no"), +// "If yes|true|1, allows journal to auto-expand by adding additional journal files as needed. " +// "If no|false|0, the number of journal files will remain fixed (num-jfiles).") +// ("max-auto-expand-jfiles", qpid::optValue(autoJrnlExpandMaxFiles, "N"), +// "Maximum number of journal files allowed from auto-expanding; must be greater than --num-jfiles parameter.") + ("truncate", qpid::optValue(truncateFlag, "yes|no"), + "If yes|true|1, will truncate the store (discard any existing records). If no|false|0, will preserve " + "the existing store files for recovery.") + ("wcache-page-size", qpid::optValue(wCachePageSizeKib, "N"), + "Size of the pages in the write page cache in KiB. " + "Allowable values - powers of 2: 1, 2, 4, ... , 128. " + "Lower values decrease latency at the expense of throughput.") + ("tpl-num-jfiles", qpid::optValue(tplNumJrnlFiles, "N"), oss3.str().c_str()) + ("tpl-jfile-size-pgs", qpid::optValue(tplJrnlFsizePgs, "N"), oss4.str().c_str()) + ("tpl-wcache-page-size", qpid::optValue(tplWCachePageSizeKib, "N"), + "Size of the pages in the transaction prepared list write page cache in KiB. " + "Allowable values - powers of 2: 1, 2, 4, ... , 128. " + "Lower values decrease latency at the expense of throughput.") + ; +} + +}} diff --git a/cpp/src/qpid/legacystore/MessageStoreImpl.h b/cpp/src/qpid/legacystore/MessageStoreImpl.h new file mode 100644 index 0000000000..68aceedfbb --- /dev/null +++ b/cpp/src/qpid/legacystore/MessageStoreImpl.h @@ -0,0 +1,380 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_MESSAGESTOREIMPL_H +#define QPID_LEGACYSTORE_MESSAGESTOREIMPL_H + +#include <string> + +#include "db-inc.h" +#include "qpid/legacystore/Cursor.h" +#include "qpid/legacystore/IdDbt.h" +#include "qpid/legacystore/IdSequence.h" +#include "qpid/legacystore/JournalImpl.h" +#include "qpid/legacystore/jrnl/jcfg.h" +#include "qpid/legacystore/PreparedTransaction.h" +#include "qpid/broker/Broker.h" +#include "qpid/broker/MessageStore.h" +#include "qpid/management/Manageable.h" +#include "qmf/org/apache/qpid/legacystore/Store.h" +#include "qpid/legacystore/TxnCtxt.h" + +// Assume DB_VERSION_MAJOR == 4 +#if (DB_VERSION_MINOR == 2) +#include <errno.h> +#define DB_BUFFER_SMALL ENOMEM +#endif + +namespace qpid { namespace sys { +class Timer; +}} + +namespace mrg { +namespace msgstore { + +/** + * An implementation of the MessageStore interface based on Berkeley DB + */ +class MessageStoreImpl : public qpid::broker::MessageStore, public qpid::management::Manageable +{ + public: + typedef boost::shared_ptr<Db> db_ptr; + typedef boost::shared_ptr<DbEnv> dbEnv_ptr; + + struct StoreOptions : public qpid::Options { + StoreOptions(const std::string& name="Store Options"); + std::string clusterName; + std::string storeDir; + u_int16_t numJrnlFiles; + bool autoJrnlExpand; + u_int16_t autoJrnlExpandMaxFiles; + u_int32_t jrnlFsizePgs; + bool truncateFlag; + u_int32_t wCachePageSizeKib; + u_int16_t tplNumJrnlFiles; + u_int32_t tplJrnlFsizePgs; + u_int32_t tplWCachePageSizeKib; + }; + + protected: + typedef std::map<u_int64_t, qpid::broker::RecoverableQueue::shared_ptr> queue_index; + typedef std::map<u_int64_t, qpid::broker::RecoverableExchange::shared_ptr> exchange_index; + typedef std::map<u_int64_t, qpid::broker::RecoverableMessage::shared_ptr> message_index; + + typedef LockedMappings::map txn_lock_map; + typedef boost::ptr_list<PreparedTransaction> txn_list; + + // Structs for Transaction Recover List (TPL) recover state + struct TplRecoverStruct { + u_int64_t rid; // rid of TPL record + bool deq_flag; + bool commit_flag; + bool tpc_flag; + TplRecoverStruct(const u_int64_t _rid, const bool _deq_flag, const bool _commit_flag, const bool _tpc_flag); + }; + typedef TplRecoverStruct TplRecover; + typedef std::pair<std::string, TplRecover> TplRecoverMapPair; + typedef std::map<std::string, TplRecover> TplRecoverMap; + typedef TplRecoverMap::const_iterator TplRecoverMapCitr; + + typedef std::map<std::string, JournalImpl*> JournalListMap; + typedef JournalListMap::iterator JournalListMapItr; + + // Default store settings + static const u_int16_t defNumJrnlFiles = 8; + static const u_int32_t defJrnlFileSizePgs = 24; + static const bool defTruncateFlag = false; + static const u_int32_t defWCachePageSize = JRNL_WMGR_DEF_PAGE_SIZE * JRNL_DBLK_SIZE * JRNL_SBLK_SIZE / 1024; + static const u_int16_t defTplNumJrnlFiles = 8; + static const u_int32_t defTplJrnlFileSizePgs = 24; + static const u_int32_t defTplWCachePageSize = defWCachePageSize / 8; + // TODO: set defAutoJrnlExpand to true and defAutoJrnlExpandMaxFiles to 16 when auto-expand comes on-line + static const bool defAutoJrnlExpand = false; + static const u_int16_t defAutoJrnlExpandMaxFiles = 0; + + static const std::string storeTopLevelDir; + static qpid::sys::Duration defJournalGetEventsTimeout; + static qpid::sys::Duration defJournalFlushTimeout; + + std::list<db_ptr> dbs; + dbEnv_ptr dbenv; + db_ptr queueDb; + db_ptr configDb; + db_ptr exchangeDb; + db_ptr mappingDb; + db_ptr bindingDb; + db_ptr generalDb; + + // Pointer to Transaction Prepared List (TPL) journal instance + boost::shared_ptr<TplJournalImpl> tplStorePtr; + TplRecoverMap tplRecoverMap; + qpid::sys::Mutex tplInitLock; + JournalListMap journalList; + qpid::sys::Mutex journalListLock; + qpid::sys::Mutex bdbLock; + + IdSequence queueIdSequence; + IdSequence exchangeIdSequence; + IdSequence generalIdSequence; + IdSequence messageIdSequence; + std::string storeDir; + u_int16_t numJrnlFiles; + bool autoJrnlExpand; + u_int16_t autoJrnlExpandMaxFiles; + u_int32_t jrnlFsizeSblks; + bool truncateFlag; + u_int32_t wCachePgSizeSblks; + u_int16_t wCacheNumPages; + u_int16_t tplNumJrnlFiles; + u_int32_t tplJrnlFsizeSblks; + u_int32_t tplWCachePgSizeSblks; + u_int16_t tplWCacheNumPages; + u_int64_t highestRid; + bool isInit; + const char* envPath; + qpid::broker::Broker* broker; + + qmf::org::apache::qpid::legacystore::Store::shared_ptr mgmtObject; + qpid::management::ManagementAgent* agent; + + + // Parameter validation and calculation + static u_int16_t chkJrnlNumFilesParam(const u_int16_t param, + const std::string paramName); + static u_int32_t chkJrnlFileSizeParam(const u_int32_t param, + const std::string paramName, + const u_int32_t wCachePgSizeSblks = 0); + static u_int32_t chkJrnlWrPageCacheSize(const u_int32_t param, + const std::string paramName, + const u_int16_t jrnlFsizePgs); + static u_int16_t getJrnlWrNumPages(const u_int32_t wrPageSizeKib); + void chkJrnlAutoExpandOptions(const MessageStoreImpl::StoreOptions* opts, + bool& autoJrnlExpand, + u_int16_t& autoJrnlExpandMaxFiles, + const std::string& autoJrnlExpandMaxFilesParamName, + const u_int16_t numJrnlFiles, + const std::string& numJrnlFilesParamName); + + void init(); + + void recoverQueues(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery, + queue_index& index, + txn_list& locked, + message_index& messages); + void recoverMessages(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery, + queue_index& index, + txn_list& locked, + message_index& prepared); + void recoverMessages(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery, + qpid::broker::RecoverableQueue::shared_ptr& queue, + txn_list& locked, + message_index& prepared, + long& rcnt, + long& idcnt); + qpid::broker::RecoverableMessage::shared_ptr getExternMessage(qpid::broker::RecoveryManager& recovery, + uint64_t mId, + unsigned& headerSize); + void recoverExchanges(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery, + exchange_index& index); + void recoverBindings(TxnCtxt& txn, + exchange_index& exchanges, + queue_index& queues); + void recoverGeneral(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery); + int enqueueMessage(TxnCtxt& txn, + IdDbt& msgId, + qpid::broker::RecoverableMessage::shared_ptr& msg, + queue_index& index, + txn_list& locked, + message_index& prepared); + void readTplStore(); + void recoverTplStore(); + void recoverLockedMappings(txn_list& txns); + TxnCtxt* check(qpid::broker::TransactionContext* ctxt); + u_int64_t msgEncode(std::vector<char>& buff, const boost::intrusive_ptr<qpid::broker::PersistableMessage>& message); + void store(const qpid::broker::PersistableQueue* queue, + TxnCtxt* txn, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& message, + bool newId); + void async_dequeue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue); + void destroy(db_ptr db, + const qpid::broker::Persistable& p); + bool create(db_ptr db, + IdSequence& seq, + const qpid::broker::Persistable& p); + void completed(TxnCtxt& txn, + bool commit); + void deleteBindingsForQueue(const qpid::broker::PersistableQueue& queue); + void deleteBinding(const qpid::broker::PersistableExchange& exchange, + const qpid::broker::PersistableQueue& queue, + const std::string& key); + + void put(db_ptr db, + DbTxn* txn, + Dbt& key, + Dbt& value); + void open(db_ptr db, + DbTxn* txn, + const char* file, + bool dupKey); + void closeDbs(); + + // journal functions + void createJrnlQueue(const qpid::broker::PersistableQueue& queue); + u_int32_t bHash(const std::string str); + std::string getJrnlDir(const qpid::broker::PersistableQueue& queue); //for exmaple /var/rhm/ + queueDir/ + std::string getJrnlHashDir(const std::string& queueName); + std::string getJrnlBaseDir(); + std::string getBdbBaseDir(); + std::string getTplBaseDir(); + inline void checkInit() { + // TODO: change the default dir to ~/.qpidd + if (!isInit) { init("/tmp"); isInit = true; } + } + void chkTplStoreInit(); + + // debug aid for printing XIDs that may contain non-printable chars + static std::string xid2str(const std::string xid) { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + for (unsigned i=0; i<xid.size(); i++) { + if (isprint(xid[i])) + oss << xid[i]; + else + oss << "/" << std::setw(2) << (int)((char)xid[i]); + } + return oss.str(); + } + + public: + typedef boost::shared_ptr<MessageStoreImpl> shared_ptr; + + MessageStoreImpl(qpid::broker::Broker* broker, const char* envpath = 0); + + virtual ~MessageStoreImpl(); + + bool init(const qpid::Options* options); + + bool init(const std::string& dir, + u_int16_t jfiles = defNumJrnlFiles, + u_int32_t jfileSizePgs = defJrnlFileSizePgs, + const bool truncateFlag = false, + u_int32_t wCachePageSize = defWCachePageSize, + u_int16_t tplJfiles = defTplNumJrnlFiles, + u_int32_t tplJfileSizePgs = defTplJrnlFileSizePgs, + u_int32_t tplWCachePageSize = defTplWCachePageSize, + bool autoJExpand = defAutoJrnlExpand, + u_int16_t autoJExpandMaxFiles = defAutoJrnlExpandMaxFiles); + + void truncateInit(const bool saveStoreContent = false); + + void initManagement (); + + void finalize(); + + void create(qpid::broker::PersistableQueue& queue, + const qpid::framing::FieldTable& args); + + void destroy(qpid::broker::PersistableQueue& queue); + + void create(const qpid::broker::PersistableExchange& queue, + const qpid::framing::FieldTable& args); + + void destroy(const qpid::broker::PersistableExchange& queue); + + void bind(const qpid::broker::PersistableExchange& exchange, + const qpid::broker::PersistableQueue& queue, + const std::string& key, + const qpid::framing::FieldTable& args); + + void unbind(const qpid::broker::PersistableExchange& exchange, + const qpid::broker::PersistableQueue& queue, + const std::string& key, + const qpid::framing::FieldTable& args); + + void create(const qpid::broker::PersistableConfig& config); + + void destroy(const qpid::broker::PersistableConfig& config); + + void recover(qpid::broker::RecoveryManager& queues); + + void stage(const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg); + + void destroy(qpid::broker::PersistableMessage& msg); + + void appendContent(const boost::intrusive_ptr<const qpid::broker::PersistableMessage>& msg, + const std::string& data); + + void loadContent(const qpid::broker::PersistableQueue& queue, + const boost::intrusive_ptr<const qpid::broker::PersistableMessage>& msg, + std::string& data, + uint64_t offset, + uint32_t length); + + void enqueue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue); + + void dequeue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue); + + void flush(const qpid::broker::PersistableQueue& queue); + + u_int32_t outstandingQueueAIO(const qpid::broker::PersistableQueue& queue); + + void collectPreparedXids(std::set<std::string>& xids); + + std::auto_ptr<qpid::broker::TransactionContext> begin(); + + std::auto_ptr<qpid::broker::TPCTransactionContext> begin(const std::string& xid); + + void prepare(qpid::broker::TPCTransactionContext& ctxt); + + void localPrepare(TxnCtxt* ctxt); + + void commit(qpid::broker::TransactionContext& ctxt); + + void abort(qpid::broker::TransactionContext& ctxt); + + qpid::management::ManagementObject::shared_ptr GetManagementObject (void) const + { return mgmtObject; } + + inline qpid::management::Manageable::status_t ManagementMethod (u_int32_t, qpid::management::Args&, std::string&) + { return qpid::management::Manageable::STATUS_OK; } + + std::string getStoreDir() const; + + private: + void journalDeleted(JournalImpl&); + +}; // class MessageStoreImpl + +} // namespace msgstore +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_MESSAGESTOREIMPL_H diff --git a/cpp/src/qpid/legacystore/PreparedTransaction.cpp b/cpp/src/qpid/legacystore/PreparedTransaction.cpp new file mode 100644 index 0000000000..50b81e2824 --- /dev/null +++ b/cpp/src/qpid/legacystore/PreparedTransaction.cpp @@ -0,0 +1,81 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/PreparedTransaction.h" +#include <algorithm> + +using namespace mrg::msgstore; +using std::string; + +void LockedMappings::add(queue_id queue, message_id message) +{ + locked.push_back(std::make_pair(queue, message)); +} + +bool LockedMappings::isLocked(queue_id queue, message_id message) +{ + idpair op( std::make_pair(queue, message) ); + return find(locked.begin(), locked.end(), op) != locked.end(); +} + +void LockedMappings::add(LockedMappings::map& map, std::string& key, queue_id queue, message_id message) +{ + LockedMappings::map::iterator i = map.find(key); + if (i == map.end()) { + LockedMappings::shared_ptr ptr(new LockedMappings()); + i = map.insert(std::make_pair(key, ptr)).first; + } + i->second->add(queue, message); +} + +bool PreparedTransaction::isLocked(queue_id queue, message_id message) +{ + return (enqueues.get() && enqueues->isLocked(queue, message)) + || (dequeues.get() && dequeues->isLocked(queue, message)); +} + + +bool PreparedTransaction::isLocked(PreparedTransaction::list& txns, queue_id queue, message_id message) +{ + for (PreparedTransaction::list::iterator i = txns.begin(); i != txns.end(); i++) { + if (i->isLocked(queue, message)) { + return true; + } + } + return false; +} + +PreparedTransaction::list::iterator PreparedTransaction::getLockedPreparedTransaction(PreparedTransaction::list& txns, queue_id queue, message_id message) +{ + for (PreparedTransaction::list::iterator i = txns.begin(); i != txns.end(); i++) { + if (i->isLocked(queue, message)) { + return i; + } + } + return txns.end(); +} + +PreparedTransaction::PreparedTransaction(const std::string& _xid, + LockedMappings::shared_ptr _enqueues, + LockedMappings::shared_ptr _dequeues) + + : xid(_xid), enqueues(_enqueues), dequeues(_dequeues) {} + diff --git a/cpp/src/qpid/legacystore/PreparedTransaction.h b/cpp/src/qpid/legacystore/PreparedTransaction.h new file mode 100644 index 0000000000..c5f7b9458a --- /dev/null +++ b/cpp/src/qpid/legacystore/PreparedTransaction.h @@ -0,0 +1,74 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_PREPAREDTRANSACTION_H +#define QPID_LEGACYSTORE_PREPAREDTRANSACTION_H + +#include <list> +#include <map> +#include <set> +#include <string> +#include <boost/shared_ptr.hpp> +#include <boost/ptr_container/ptr_list.hpp> + +namespace mrg{ +namespace msgstore{ + +typedef u_int64_t queue_id; +typedef u_int64_t message_id; + +class LockedMappings +{ +public: + typedef boost::shared_ptr<LockedMappings> shared_ptr; + typedef std::map<std::string, shared_ptr> map; + typedef std::pair<queue_id, message_id> idpair; + typedef std::list<idpair>::iterator iterator; + + void add(queue_id queue, message_id message); + bool isLocked(queue_id queue, message_id message); + std::size_t size() { return locked.size(); } + iterator begin() { return locked.begin(); } + iterator end() { return locked.end(); } + + static void add(LockedMappings::map& map, std::string& key, queue_id queue, message_id message); + +private: + std::list<idpair> locked; +}; + +struct PreparedTransaction +{ + typedef boost::ptr_list<PreparedTransaction> list; + + const std::string xid; + const LockedMappings::shared_ptr enqueues; + const LockedMappings::shared_ptr dequeues; + + PreparedTransaction(const std::string& xid, LockedMappings::shared_ptr enqueues, LockedMappings::shared_ptr dequeues); + bool isLocked(queue_id queue, message_id message); + static bool isLocked(PreparedTransaction::list& txns, queue_id queue, message_id message); + static PreparedTransaction::list::iterator getLockedPreparedTransaction(PreparedTransaction::list& txns, queue_id queue, message_id message); +}; + +}} + +#endif // ifndef QPID_LEGACYSTORE_PREPAREDTRANSACTION_H diff --git a/cpp/src/qpid/legacystore/StoreException.h b/cpp/src/qpid/legacystore/StoreException.h new file mode 100644 index 0000000000..6624aafd5a --- /dev/null +++ b/cpp/src/qpid/legacystore/StoreException.h @@ -0,0 +1,56 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_STOREEXCEPTION_H +#define QPID_LEGACYSTORE_STOREEXCEPTION_H + +#include "qpid/legacystore/IdDbt.h" +#include <boost/format.hpp> + +namespace mrg{ +namespace msgstore{ + +class StoreException : public std::exception +{ + std::string text; +public: + StoreException(const std::string& _text) : text(_text) {} + StoreException(const std::string& _text, const DbException& cause) : text(_text + ": " + cause.what()) {} + virtual ~StoreException() throw() {} + virtual const char* what() const throw() { return text.c_str(); } +}; + +class StoreFullException : public StoreException +{ +public: + StoreFullException(const std::string& _text) : StoreException(_text) {} + StoreFullException(const std::string& _text, const DbException& cause) : StoreException(_text, cause) {} + virtual ~StoreFullException() throw() {} + +}; + +#define THROW_STORE_EXCEPTION(MESSAGE) throw StoreException(boost::str(boost::format("%s (%s:%d)") % (MESSAGE) % __FILE__ % __LINE__)) +#define THROW_STORE_EXCEPTION_2(MESSAGE, EXCEPTION) throw StoreException(boost::str(boost::format("%s (%s:%d)") % (MESSAGE) % __FILE__ % __LINE__), EXCEPTION) +#define THROW_STORE_FULL_EXCEPTION(MESSAGE) throw StoreFullException(boost::str(boost::format("%s (%s:%d)") % (MESSAGE) % __FILE__ % __LINE__)) + +}} + +#endif // ifndef QPID_LEGACYSTORE_STOREEXCEPTION_H diff --git a/cpp/src/qpid/legacystore/StorePlugin.cpp b/cpp/src/qpid/legacystore/StorePlugin.cpp new file mode 100644 index 0000000000..f9b77ce02c --- /dev/null +++ b/cpp/src/qpid/legacystore/StorePlugin.cpp @@ -0,0 +1,81 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/broker/Broker.h" +#include "qpid/Plugin.h" +#include "qpid/Options.h" +#include "qpid/DataDir.h" +#include "qpid/log/Statement.h" +#include "qpid/legacystore/MessageStoreImpl.h" + +using mrg::msgstore::MessageStoreImpl; + +namespace qpid { +namespace broker { + +using namespace std; + +struct StorePlugin : public Plugin { + + MessageStoreImpl::StoreOptions options; + boost::shared_ptr<MessageStoreImpl> store; + + Options* getOptions() { return &options; } + + void earlyInitialize (Plugin::Target& target) + { + Broker* broker = dynamic_cast<Broker*>(&target); + if (!broker) return; + store.reset(new MessageStoreImpl(broker)); + DataDir& dataDir = broker->getDataDir (); + if (options.storeDir.empty ()) + { + if (!dataDir.isEnabled ()) + throw Exception ("msgstore: If --data-dir is blank or --no-data-dir is specified, --store-dir must be present."); + + options.storeDir = dataDir.getPath (); + } + store->init(&options); + boost::shared_ptr<qpid::broker::MessageStore> brokerStore(store); + broker->setStore(brokerStore); + target.addFinalizer(boost::bind(&StorePlugin::finalize, this)); + } + + void initialize(Plugin::Target& target) + { + Broker* broker = dynamic_cast<Broker*>(&target); + if (!broker) return; + if (!store) return; + QPID_LOG(info, "Enabling management instrumentation for the store."); + store->initManagement(); + } + + void finalize() + { + store.reset(); + } + + const char* id() {return "StorePlugin";} +}; + +static StorePlugin instance; // Static initialization. + +}} // namespace qpid::broker diff --git a/cpp/src/qpid/legacystore/TxnCtxt.cpp b/cpp/src/qpid/legacystore/TxnCtxt.cpp new file mode 100644 index 0000000000..1db41f4c70 --- /dev/null +++ b/cpp/src/qpid/legacystore/TxnCtxt.cpp @@ -0,0 +1,184 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/legacystore/TxnCtxt.h" + +#include <sstream> + +#include "qpid/legacystore/jrnl/jexception.h" +#include "qpid/legacystore/StoreException.h" + +namespace mrg { +namespace msgstore { + +void TxnCtxt::completeTxn(bool commit) { + sync(); + for (ipqItr i = impactedQueues.begin(); i != impactedQueues.end(); i++) { + commitTxn(static_cast<JournalImpl*>(*i), commit); + } + impactedQueues.clear(); + if (preparedXidStorePtr) + commitTxn(preparedXidStorePtr, commit); +} + +void TxnCtxt::commitTxn(JournalImpl* jc, bool commit) { + if (jc && loggedtx) { /* if using journal */ + boost::intrusive_ptr<DataTokenImpl> dtokp(new DataTokenImpl); + dtokp->addRef(); + dtokp->set_external_rid(true); + dtokp->set_rid(loggedtx->next()); + try { + if (commit) { + jc->txn_commit(dtokp.get(), getXid()); + sync(); + } else { + jc->txn_abort(dtokp.get(), getXid()); + } + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Error commit") + e.what()); + } + } +} + +// static +uuid_t TxnCtxt::uuid; + +// static +IdSequence TxnCtxt::uuidSeq; + +// static +bool TxnCtxt::staticInit = TxnCtxt::setUuid(); + +// static +bool TxnCtxt::setUuid() { + ::uuid_generate(uuid); + return true; +} + +TxnCtxt::TxnCtxt(IdSequence* _loggedtx) : loggedtx(_loggedtx), dtokp(new DataTokenImpl), preparedXidStorePtr(0), txn(0) { + if (loggedtx) { +// // Human-readable tid: 53 bytes +// // uuit_t is a char[16] +// tid.reserve(53); +// u_int64_t* u1 = (u_int64_t*)uuid; +// u_int64_t* u2 = (u_int64_t*)(uuid + sizeof(u_int64_t)); +// std::stringstream s; +// s << "tid:" << std::hex << std::setfill('0') << std::setw(16) << uuidSeq.next() << ":" << std::setw(16) << *u1 << std::setw(16) << *u2; +// tid.assign(s.str()); + + // Binary tid: 24 bytes + tid.reserve(24); + u_int64_t c = uuidSeq.next(); + tid.append((char*)&c, sizeof(c)); + tid.append((char*)&uuid, sizeof(uuid)); + } +} + +TxnCtxt::TxnCtxt(std::string _tid, IdSequence* _loggedtx) : loggedtx(_loggedtx), dtokp(new DataTokenImpl), preparedXidStorePtr(0), tid(_tid), txn(0) {} + +TxnCtxt::~TxnCtxt() { abort(); } + +void TxnCtxt::sync() { + if (loggedtx) { + try { + for (ipqItr i = impactedQueues.begin(); i != impactedQueues.end(); i++) + jrnl_flush(static_cast<JournalImpl*>(*i)); + if (preparedXidStorePtr) + jrnl_flush(preparedXidStorePtr); + for (ipqItr i = impactedQueues.begin(); i != impactedQueues.end(); i++) + jrnl_sync(static_cast<JournalImpl*>(*i), &journal::jcntl::_aio_cmpl_timeout); + if (preparedXidStorePtr) + jrnl_sync(preparedXidStorePtr, &journal::jcntl::_aio_cmpl_timeout); + } catch (const journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Error during txn sync: ") + e.what()); + } + } +} + +void TxnCtxt::jrnl_flush(JournalImpl* jc) { + if (jc && !(jc->is_txn_synced(getXid()))) + jc->flush(); +} + +void TxnCtxt::jrnl_sync(JournalImpl* jc, timespec* timeout) { + if (!jc || jc->is_txn_synced(getXid())) + return; + while (jc->get_wr_aio_evt_rem()) { + if (jc->get_wr_events(timeout) == journal::jerrno::AIO_TIMEOUT && timeout) + THROW_STORE_EXCEPTION(std::string("Error: timeout waiting for TxnCtxt::jrnl_sync()")); + } +} + +void TxnCtxt::begin(DbEnv* env, bool sync) { + int err; + try { err = env->txn_begin(0, &txn, 0); } + catch (const DbException&) { txn = 0; throw; } + if (err != 0) { + std::ostringstream oss; + oss << "Error: Env::txn_begin() returned error code: " << err; + THROW_STORE_EXCEPTION(oss.str()); + } + if (sync) + globalHolder = AutoScopedLock(new qpid::sys::Mutex::ScopedLock(globalSerialiser)); +} + +void TxnCtxt::commit() { + if (txn) { + txn->commit(0); + txn = 0; + globalHolder.reset(); + } +} + +void TxnCtxt::abort(){ + if (txn) { + txn->abort(); + txn = 0; + globalHolder.reset(); + } +} + +DbTxn* TxnCtxt::get() { return txn; } + +bool TxnCtxt::isTPC() { return false; } + +const std::string& TxnCtxt::getXid() { return tid; } + +void TxnCtxt::addXidRecord(qpid::broker::ExternalQueueStore* queue) { impactedQueues.insert(queue); } + +void TxnCtxt::complete(bool commit) { completeTxn(commit); } + +bool TxnCtxt::impactedQueuesEmpty() { return impactedQueues.empty(); } + +DataTokenImpl* TxnCtxt::getDtok() { return dtokp.get(); } + +void TxnCtxt::incrDtokRef() { dtokp->addRef(); } + +void TxnCtxt::recoverDtok(const u_int64_t rid, const std::string xid) { + dtokp->set_rid(rid); + dtokp->set_wstate(DataTokenImpl::ENQ); + dtokp->set_xid(xid); + dtokp->set_external_rid(true); +} + +TPCTxnCtxt::TPCTxnCtxt(const std::string& _xid, IdSequence* _loggedtx) : TxnCtxt(_loggedtx), xid(_xid) {} + +}} diff --git a/cpp/src/qpid/legacystore/TxnCtxt.h b/cpp/src/qpid/legacystore/TxnCtxt.h new file mode 100644 index 0000000000..77eaa27cd7 --- /dev/null +++ b/cpp/src/qpid/legacystore/TxnCtxt.h @@ -0,0 +1,117 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LEGACYSTORE_TXNCTXT_H +#define QPID_LEGACYSTORE_TXNCTXT_H + +#include "db-inc.h" +#include <memory> +#include <set> +#include <string> + +#include "qpid/legacystore/DataTokenImpl.h" +#include "qpid/legacystore/IdSequence.h" +#include "qpid/legacystore/JournalImpl.h" +#include "qpid/broker/PersistableQueue.h" +#include "qpid/broker/TransactionalStore.h" +#include "qpid/sys/Mutex.h" +#include "qpid/sys/uuid.h" + +#include <boost/intrusive_ptr.hpp> + +namespace mrg { +namespace msgstore { + +class TxnCtxt : public qpid::broker::TransactionContext +{ + protected: + static qpid::sys::Mutex globalSerialiser; + + static uuid_t uuid; + static IdSequence uuidSeq; + static bool staticInit; + static bool setUuid(); + + typedef std::set<qpid::broker::ExternalQueueStore*> ipqdef; + typedef ipqdef::iterator ipqItr; + typedef std::auto_ptr<qpid::sys::Mutex::ScopedLock> AutoScopedLock; + + ipqdef impactedQueues; // list of Queues used in the txn + IdSequence* loggedtx; + boost::intrusive_ptr<DataTokenImpl> dtokp; + AutoScopedLock globalHolder; + JournalImpl* preparedXidStorePtr; + + /** + * local txn id, if non XA. + */ + std::string tid; + DbTxn* txn; + + virtual void completeTxn(bool commit); + void commitTxn(JournalImpl* jc, bool commit); + void jrnl_flush(JournalImpl* jc); + void jrnl_sync(JournalImpl* jc, timespec* timeout); + + public: + TxnCtxt(IdSequence* _loggedtx=NULL); + TxnCtxt(std::string _tid, IdSequence* _loggedtx); + virtual ~TxnCtxt(); + + /** + * Call to make sure all the data for this txn is written to safe store + * + *@return if the data successfully synced. + */ + void sync(); + void begin(DbEnv* env, bool sync = false); + void commit(); + void abort(); + DbTxn* get(); + virtual bool isTPC(); + virtual const std::string& getXid(); + + void addXidRecord(qpid::broker::ExternalQueueStore* queue); + inline void prepare(JournalImpl* _preparedXidStorePtr) { preparedXidStorePtr = _preparedXidStorePtr; } + void complete(bool commit); + bool impactedQueuesEmpty(); + DataTokenImpl* getDtok(); + void incrDtokRef(); + void recoverDtok(const u_int64_t rid, const std::string xid); +}; + + +class TPCTxnCtxt : public TxnCtxt, public qpid::broker::TPCTransactionContext +{ + protected: + const std::string xid; + + public: + TPCTxnCtxt(const std::string& _xid, IdSequence* _loggedtx); + inline virtual bool isTPC() { return true; } + inline virtual const std::string& getXid() { return xid; } +}; + +}} + +#endif // ifndef QPID_LEGACYSTORE_TXNCTXT_H + + diff --git a/cpp/src/qpid/legacystore/jrnl/aio.cpp b/cpp/src/qpid/legacystore/jrnl/aio.cpp new file mode 100644 index 0000000000..ffbddd887e --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/aio.cpp @@ -0,0 +1,41 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file aio.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::aio (libaio interface + * encapsulation). See comments in file aio.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/aio.h" + +namespace mrg +{ +namespace journal +{ + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/aio.h b/cpp/src/qpid/legacystore/jrnl/aio.h new file mode 100644 index 0000000000..b1de5f79f7 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/aio.h @@ -0,0 +1,153 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file aio.h + * + * Qpid asynchronous store plugin library + * + * This file contains an encapsulation of the libaio interface used + * by the journal. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_AIO_H +#define QPID_LEGACYSTORE_JRNL_AIO_H + +#include <libaio.h> +#include <cstring> +#include <sys/types.h> +#include <string.h> + +namespace mrg +{ +namespace journal +{ + +typedef iocb aio_cb; +typedef io_event aio_event; + +/** + * \brief This class is a C++ wrapper class for the libaio functions used by the journal. Note that only those + * functions used by the journal are included here. This is not a complete implementation of all libaio functions. + */ +class aio +{ +public: + static inline int queue_init(int maxevents, io_context_t* ctxp) + { + return ::io_queue_init(maxevents, ctxp); + } + + static inline int queue_release(io_context_t ctx) + { + return ::io_queue_release(ctx); + } + + static inline int submit(io_context_t ctx, long nr, aio_cb* aios[]) + { + return ::io_submit(ctx, nr, aios); + } + + static inline int getevents(io_context_t ctx, long min_nr, long nr, aio_event* events, timespec* const timeout) + { + return ::io_getevents(ctx, min_nr, nr, events, timeout); + } + + /** + * \brief This function allows iocbs to be initialized with a pointer that can be re-used. This prepares an + * aio_cb struct for read use. (This is a wrapper for libaio's ::io_prep_pread() function.) + * + * \param aiocbp Pointer to the aio_cb struct to be prepared. + * \param fd File descriptor to be used for read. + * \param buf Pointer to buffer in which read data is to be placed. + * \param count Number of bytes to read - buffer must be large enough. + * \param offset Offset within file from which data will be read. + */ + static inline void prep_pread(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset) + { + ::io_prep_pread(aiocbp, fd, buf, count, offset); + } + + /** + * \brief Special version of libaio's io_prep_pread() which preserves the value of the data pointer. This allows + * iocbs to be initialized with a pointer that can be re-used. This prepares a aio_cb struct for read use. + * + * \param aiocbp Pointer to the aio_cb struct to be prepared. + * \param fd File descriptor to be used for read. + * \param buf Pointer to buffer in which read data is to be placed. + * \param count Number of bytes to read - buffer must be large enough. + * \param offset Offset within file from which data will be read. + */ + static inline void prep_pread_2(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset) + { + std::memset((void*) ((char*) aiocbp + sizeof(void*)), 0, sizeof(aio_cb) - sizeof(void*)); + aiocbp->aio_fildes = fd; + aiocbp->aio_lio_opcode = IO_CMD_PREAD; + aiocbp->aio_reqprio = 0; + aiocbp->u.c.buf = buf; + aiocbp->u.c.nbytes = count; + aiocbp->u.c.offset = offset; + } + + /** + * \brief This function allows iocbs to be initialized with a pointer that can be re-used. This function prepares + * an aio_cb struct for write use. (This is a wrapper for libaio's ::io_prep_pwrite() function.) + * + * \param aiocbp Pointer to the aio_cb struct to be prepared. + * \param fd File descriptor to be used for write. + * \param buf Pointer to buffer in which data to be written is located. + * \param count Number of bytes to write. + * \param offset Offset within file to which data will be written. + */ + static inline void prep_pwrite(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset) + { + ::io_prep_pwrite(aiocbp, fd, buf, count, offset); + } + + /** + * \brief Special version of libaio's io_prep_pwrite() which preserves the value of the data pointer. This allows + * iocbs to be initialized with a pointer that can be re-used. This function prepares an aio_cb struct for write + * use. + * + * \param aiocbp Pointer to the aio_cb struct to be prepared. + * \param fd File descriptor to be used for write. + * \param buf Pointer to buffer in which data to be written is located. + * \param count Number of bytes to write. + * \param offset Offset within file to which data will be written. + */ + static inline void prep_pwrite_2(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset) + { + std::memset((void*) ((char*) aiocbp + sizeof(void*)), 0, sizeof(aio_cb) - sizeof(void*)); + aiocbp->aio_fildes = fd; + aiocbp->aio_lio_opcode = IO_CMD_PWRITE; + aiocbp->aio_reqprio = 0; + aiocbp->u.c.buf = buf; + aiocbp->u.c.nbytes = count; + aiocbp->u.c.offset = offset; + } +}; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_AIO_H diff --git a/cpp/src/qpid/legacystore/jrnl/aio_callback.h b/cpp/src/qpid/legacystore/jrnl/aio_callback.h new file mode 100644 index 0000000000..90249278a5 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/aio_callback.h @@ -0,0 +1,57 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file aio_callback.h + * + * Qpid asynchronous store plugin library + * + * This file contains the definition for the AIO callback function + * pointer. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_AIO_CALLBACK_H +#define QPID_LEGACYSTORE_JRNL_AIO_CALLBACK_H + +#include <vector> +#include <sys/types.h> + +namespace mrg +{ +namespace journal +{ + + class data_tok; + + class aio_callback + { + public: + virtual ~aio_callback() {} + virtual void wr_aio_cb(std::vector<data_tok*>& dtokl) = 0; + virtual void rd_aio_cb(std::vector<u_int16_t>& pil) = 0; + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_AIO_CALLBACK_H diff --git a/cpp/src/qpid/legacystore/jrnl/cvar.cpp b/cpp/src/qpid/legacystore/jrnl/cvar.cpp new file mode 100644 index 0000000000..e4010bf91f --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/cvar.cpp @@ -0,0 +1,33 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file cvar.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::cvar (condition variable). See + * comments in file cvar.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/cvar.h" diff --git a/cpp/src/qpid/legacystore/jrnl/cvar.h b/cpp/src/qpid/legacystore/jrnl/cvar.h new file mode 100644 index 0000000000..0498e743a2 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/cvar.h @@ -0,0 +1,87 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file cvar.h + * + * Qpid asynchronous store plugin library + * + * This file contains a posix condition variable class. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_CVAR_H +#define QPID_LEGACYSTORE_JRNL_CVAR_H + +#include <cstring> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include "qpid/legacystore/jrnl/smutex.h" +#include "qpid/legacystore/jrnl/time_ns.h" +#include <pthread.h> +#include <sstream> + +namespace mrg +{ +namespace journal +{ + + // Ultra-simple thread condition variable class + class cvar + { + private: + const smutex& _sm; + pthread_cond_t _c; + public: + inline cvar(const smutex& sm) : _sm(sm) { ::pthread_cond_init(&_c, 0); } + inline ~cvar() { ::pthread_cond_destroy(&_c); } + inline void wait() + { + PTHREAD_CHK(::pthread_cond_wait(&_c, _sm.get()), "::pthread_cond_wait", "cvar", "wait"); + } + inline void timedwait(timespec& ts) + { + PTHREAD_CHK(::pthread_cond_timedwait(&_c, _sm.get(), &ts), "::pthread_cond_timedwait", "cvar", "timedwait"); + } + inline bool waitintvl(const long intvl_ns) + { + time_ns t; t.now(); t+=intvl_ns; + int ret = ::pthread_cond_timedwait(&_c, _sm.get(), &t); + if (ret == ETIMEDOUT) + return true; + PTHREAD_CHK(ret, "::pthread_cond_timedwait", "cvar", "waitintvl"); + return false; + } + inline void signal() + { + PTHREAD_CHK(::pthread_cond_signal(&_c), "::pthread_cond_signal", "cvar", "notify"); + } + inline void broadcast() + { + PTHREAD_CHK(::pthread_cond_broadcast(&_c), "::pthread_cond_broadcast", "cvar", "broadcast"); + } + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_CVAR_H diff --git a/cpp/src/qpid/legacystore/jrnl/data_tok.cpp b/cpp/src/qpid/legacystore/jrnl/data_tok.cpp new file mode 100644 index 0000000000..ce7206d80d --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/data_tok.cpp @@ -0,0 +1,194 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file data_tok.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::data_tok (data block token). + * See comments in file data_tok.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/data_tok.h" + +#include <iomanip> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include "qpid/legacystore/jrnl/slock.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +// Static members + +u_int64_t data_tok::_cnt = 0; +smutex data_tok::_mutex; + +data_tok::data_tok(): + _wstate(NONE), + _rstate(UNREAD), + _dsize(0), + _dblks_written(0), + _dblks_read(0), + _pg_cnt(0), + _fid(0), + _rid(0), + _xid(), + _dequeue_rid(0), + _external_rid(false) +{ + slock s(_mutex); + _icnt = _cnt++; +} + +data_tok::~data_tok() {} + +const char* +data_tok::wstate_str() const +{ + return wstate_str(_wstate); +} + +const char* +data_tok::wstate_str(write_state wstate) +{ + switch (wstate) + { + case NONE: + return "NONE"; + case ENQ_CACHED: + return "ENQ_CACHED"; + case ENQ_PART: + return "ENQ_PART"; + case ENQ_SUBM: + return "ENQ_SUBM"; + case ENQ: + return "ENQ"; + case DEQ_CACHED: + return "DEQ_CACHED"; + case DEQ_PART: + return "DEQ_PART"; + case DEQ_SUBM: + return "DEQ_SUBM"; + case DEQ: + return "DEQ"; + case ABORT_CACHED: + return "ABORT_CACHED"; + case ABORT_PART: + return "ABORT_PART"; + case ABORT_SUBM: + return "ABORT_SUBM"; + case ABORTED: + return "ABORTED"; + case COMMIT_CACHED: + return "COMMIT_CACHED"; + case COMMIT_PART: + return "COMMIT_PART"; + case COMMIT_SUBM: + return "COMMIT_SUBM"; + case COMMITTED: + return "COMMITTED"; + } + // Not using default: forces compiler to ensure all cases are covered. + return "<wstate unknown>"; +} + +const char* +data_tok::rstate_str() const +{ + return rstate_str(_rstate); +} + +const char* +data_tok::rstate_str(read_state rstate) +{ + switch (rstate) + { + case NONE: + return "NONE"; + case READ_PART: + return "READ_PART"; + case SKIP_PART: + return "SKIP_PART"; + case READ: + return "READ"; + // Not using default: forces compiler to ensure all cases are covered. + } + return "<rstate unknown>"; +} + +void +data_tok::set_rstate(const read_state rstate) +{ + if (_wstate != ENQ && rstate != UNREAD) + { + std::ostringstream oss; + oss << "Attempted to change read state to " << rstate_str(rstate); + oss << " while write state is not enqueued (wstate ENQ); wstate=" << wstate_str() << "."; + throw jexception(jerrno::JERR_DTOK_ILLEGALSTATE, oss.str(), "data_tok", + "set_rstate"); + } + _rstate = rstate; +} + +void +data_tok::reset() +{ + _wstate = NONE; + _rstate = UNREAD; + _dsize = 0; + _dblks_written = 0; + _dblks_read = 0; + _pg_cnt = 0; + _fid = 0; + _rid = 0; + _xid.clear(); +} + +// debug aid +std::string +data_tok::status_str() const +{ + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "dtok id=0x" << _icnt << "; ws=" << wstate_str() << "; rs=" << rstate_str(); + oss << "; fid=0x" << _fid << "; rid=0x" << _rid << "; xid="; + for (unsigned i=0; i<_xid.size(); i++) + { + if (isprint(_xid[i])) + oss << _xid[i]; + else + oss << "/" << std::setw(2) << (int)((char)_xid[i]); + } + oss << "; drid=0x" << _dequeue_rid << " extrid=" << (_external_rid?"T":"F"); + oss << "; ds=0x" << _dsize << "; dw=0x" << _dblks_written << "; dr=0x" << _dblks_read; + oss << " pc=0x" << _pg_cnt; + return oss.str(); +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/data_tok.h b/cpp/src/qpid/legacystore/jrnl/data_tok.h new file mode 100644 index 0000000000..e35f069399 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/data_tok.h @@ -0,0 +1,172 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file data_tok.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::data_tok (data block token). + * See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_DATA_TOK_H +#define QPID_LEGACYSTORE_JRNL_DATA_TOK_H + +namespace mrg +{ +namespace journal +{ +class data_tok; +} +} + +#include <cassert> +#include <cstddef> +#include "qpid/legacystore/jrnl/smutex.h" +#include <pthread.h> +#include <string> +#include <sys/types.h> + +namespace mrg +{ + +namespace journal +{ + + /** + * \class data_tok + * \brief Data block token (data_tok) used to track wstate of a data block through asynchronous + * I/O process + */ + class data_tok + { + public: + // TODO: Fix this, separate write state from operation + // ie: wstate = NONE, CACHED, PART, SUBM, COMPL + // op = ENQUEUE, DEQUEUE, ABORT, COMMIT + enum write_state + { + NONE, ///< Data block not sent to journal + ENQ_CACHED, ///< Data block enqueue written to page cache + ENQ_PART, ///< Data block part-submitted to AIO, waiting for page buffer to free up + ENQ_SUBM, ///< Data block enqueue submitted to AIO + ENQ, ///< Data block enqueue AIO write complete (enqueue complete) + DEQ_CACHED, ///< Data block dequeue written to page cache + DEQ_PART, ///< Data block part-submitted to AIO, waiting for page buffer to free up + DEQ_SUBM, ///< Data block dequeue submitted to AIO + DEQ, ///< Data block dequeue AIO write complete (dequeue complete) + ABORT_CACHED, + ABORT_PART, + ABORT_SUBM, + ABORTED, + COMMIT_CACHED, + COMMIT_PART, + COMMIT_SUBM, + COMMITTED + }; + + enum read_state + { + UNREAD, ///< Data block not read + READ_PART, ///< Data block is part-read; waiting for page buffer to fill + SKIP_PART, ///< Prev. dequeued dblock is part-skipped; waiting for page buffer to fill + READ ///< Data block is fully read + }; + + protected: + static smutex _mutex; + static u_int64_t _cnt; + u_int64_t _icnt; + write_state _wstate; ///< Enqueued / dequeued state of data + read_state _rstate; ///< Read state of data + std::size_t _dsize; ///< Data size in bytes + u_int32_t _dblks_written; ///< Data blocks read/written + u_int32_t _dblks_read; ///< Data blocks read/written + u_int32_t _pg_cnt; ///< Page counter - incr for each page containing part of data + u_int16_t _fid; ///< FID containing header of enqueue record + u_int64_t _rid; ///< RID of data set by enqueue operation + std::string _xid; ///< XID set by enqueue operation + u_int64_t _dequeue_rid; ///< RID of data set by dequeue operation + bool _external_rid; ///< Flag to indicate external setting of rid + + public: + data_tok(); + virtual ~data_tok(); + + inline u_int64_t id() const { return _icnt; } + inline write_state wstate() const { return _wstate; } + const char* wstate_str() const; + static const char* wstate_str(write_state wstate); + inline read_state rstate() const { return _rstate; } + const char* rstate_str() const; + static const char* rstate_str(read_state rstate); + inline bool is_writable() const { return _wstate == NONE || _wstate == ENQ_PART; } + inline bool is_enqueued() const { return _wstate == ENQ; } + inline bool is_readable() const { return _wstate == ENQ; } + inline bool is_read() const { return _rstate == READ; } + inline bool is_dequeueable() const { return _wstate == ENQ || _wstate == DEQ_PART; } + inline void set_wstate(const write_state wstate) { _wstate = wstate; } + void set_rstate(const read_state rstate); + inline std::size_t dsize() const { return _dsize; } + inline void set_dsize(std::size_t dsize) { _dsize = dsize; } + + inline u_int32_t dblocks_written() const { return _dblks_written; } + inline void incr_dblocks_written(u_int32_t dblks_written) + { _dblks_written += dblks_written; } + inline void set_dblocks_written(u_int32_t dblks_written) { _dblks_written = dblks_written; } + + inline u_int32_t dblocks_read() const { return _dblks_read; } + inline void incr_dblocks_read(u_int32_t dblks_read) { _dblks_read += dblks_read; } + inline void set_dblocks_read(u_int32_t dblks_read) { _dblks_read = dblks_read; } + + inline u_int32_t pg_cnt() const { return _pg_cnt; } + inline u_int32_t incr_pg_cnt() { return ++_pg_cnt; } + inline u_int32_t decr_pg_cnt() { assert(_pg_cnt != 0); return --_pg_cnt; } + + inline u_int16_t fid() const { return _fid; } + inline void set_fid(const u_int16_t fid) { _fid = fid; } + inline u_int64_t rid() const { return _rid; } + inline void set_rid(const u_int64_t rid) { _rid = rid; } + inline u_int64_t dequeue_rid() const {return _dequeue_rid; } + inline void set_dequeue_rid(const u_int64_t rid) { _dequeue_rid = rid; } + inline bool external_rid() const { return _external_rid; } + inline void set_external_rid(const bool external_rid) { _external_rid = external_rid; } + + inline bool has_xid() const { return !_xid.empty(); } + inline const std::string& xid() const { return _xid; } + inline void clear_xid() { _xid.clear(); } + inline void set_xid(const std::string& xid) { _xid.assign(xid); } + inline void set_xid(const void* xidp, const std::size_t xid_len) + { _xid.assign((const char*)xidp, xid_len); } + + void reset(); + + // debug aid + std::string status_str() const; + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_DATA_TOK_H diff --git a/cpp/src/qpid/legacystore/jrnl/deq_hdr.h b/cpp/src/qpid/legacystore/jrnl/deq_hdr.h new file mode 100644 index 0000000000..ae7081eac1 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/deq_hdr.h @@ -0,0 +1,141 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file deq_hdr.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::deq_hdr (dequeue record), + * used to dequeue a previously enqueued record. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_DEQ_HDR_H +#define QPID_LEGACYSTORE_JRNL_DEQ_HDR_H + +#include <cstddef> +#include "qpid/legacystore/jrnl/rec_hdr.h" + +namespace mrg +{ +namespace journal +{ + +#pragma pack(1) + + /** + * \brief Struct for dequeue record. + * + * Struct for dequeue record. If this record has a non-zero xidsize field (i.e., there is a + * valid XID), then this header is followed by the XID of xidsize bytes and a rec_tail. If, + * on the other hand, this record has a zero xidsize (i.e., there is no XID), then the rec_tail + * is absent. + * + * Note that this record had its own rid distinct from the rid of the record it is dequeueing. + * The rid field below is the rid of the dequeue record itself; the deq-rid field is the rid of a + * previous enqueue record being dequeued by this record. + * + * Record header info in binary format (32 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ -+ + * | magic | v | e | flags | | + * +---+---+---+---+---+---+---+---+ | struct hdr + * | rid | | + * +---+---+---+---+---+---+---+---+ -+ + * | deq-rid | + * +---+---+---+---+---+---+---+---+ + * | xidsize | + * +---+---+---+---+---+---+---+---+ + * v = file version (If the format or encoding of this file changes, then this + * number should be incremented) + * e = endian flag, false (0x00) for little endian, true (0x01) for big endian + * </pre> + * + * Note that journal files should be transferable between 32- and 64-bit + * hardware of the same endianness, but not between hardware of opposite + * entianness without some sort of binary conversion utility. Thus buffering + * will be needed for types that change size between 32- and 64-bit compiles. + */ + struct deq_hdr : rec_hdr + { + u_int64_t _deq_rid; ///< Record ID of dequeued record +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler0; ///< Big-endian filler for 32-bit size_t +#endif + std::size_t _xidsize; ///< XID size +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler0; ///< Little-endian filler for 32-bit size_t +#endif + static const u_int16_t DEQ_HDR_TXNCMPLCOMMIT_MASK = 0x10; + + /** + * \brief Default constructor, which sets all values to 0. + */ + inline deq_hdr(): rec_hdr(), _deq_rid(0), +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif + _xidsize(0) +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + , _filler0(0) +#endif + {} + + /** + * \brief Convenience constructor which initializes values during construction. + */ + inline deq_hdr(const u_int32_t magic, const u_int8_t version, const u_int64_t rid, + const u_int64_t deq_rid, const std::size_t xidsize, const bool owi, + const bool txn_coml_commit = false): + rec_hdr(magic, version, rid, owi), _deq_rid(deq_rid), +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif + _xidsize(xidsize) +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + , _filler0(0) +#endif + { set_txn_coml_commit(txn_coml_commit); } + + + inline bool is_txn_coml_commit() const { return _uflag & DEQ_HDR_TXNCMPLCOMMIT_MASK; } + + inline void set_txn_coml_commit(const bool commit) + { + _uflag = commit ? _uflag | DEQ_HDR_TXNCMPLCOMMIT_MASK : + _uflag & (~DEQ_HDR_TXNCMPLCOMMIT_MASK); + } + + /** + * \brief Returns the size of the header in bytes. + */ + inline static std::size_t size() { return sizeof(deq_hdr); } + }; + +#pragma pack() + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_DEQ_HDR_H diff --git a/cpp/src/qpid/legacystore/jrnl/deq_rec.cpp b/cpp/src/qpid/legacystore/jrnl/deq_rec.cpp new file mode 100644 index 0000000000..4de412c201 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/deq_rec.cpp @@ -0,0 +1,459 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file deq_rec.cpp + * + * Qpid asynchronous store plugin library + * + * This file contains the code for the mrg::journal::deq_rec (journal dequeue + * record) class. See comments in file deq_rec.h for details. + * + * \author Kim van der Riet + */ + +#include "jrnl/deq_rec.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <cstring> +#include <iomanip> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +deq_rec::deq_rec(): + _deq_hdr(RHM_JDAT_DEQ_MAGIC, RHM_JDAT_VERSION, 0, 0, 0, false), + _xidp(0), + _buff(0), + _deq_tail(_deq_hdr) +{} + +deq_rec::deq_rec(const u_int64_t rid, const u_int64_t drid, const void* const xidp, + const std::size_t xidlen, const bool owi, const bool txn_coml_commit): + _deq_hdr(RHM_JDAT_DEQ_MAGIC, RHM_JDAT_VERSION, rid, drid, xidlen, owi, txn_coml_commit), + _xidp(xidp), + _buff(0), + _deq_tail(_deq_hdr) +{} + +deq_rec::~deq_rec() +{ + clean(); +} + +void +deq_rec::reset() +{ + _deq_hdr._rid = 0; + _deq_hdr.set_owi(false); + _deq_hdr.set_txn_coml_commit(false); + _deq_hdr._deq_rid = 0; + _deq_hdr._xidsize = 0; + _deq_tail._rid = 0; + _xidp = 0; + _buff = 0; +} + +void +deq_rec::reset(const u_int64_t rid, const u_int64_t drid, const void* const xidp, + const std::size_t xidlen, const bool owi, const bool txn_coml_commit) +{ + _deq_hdr._rid = rid; + _deq_hdr.set_owi(owi); + _deq_hdr.set_txn_coml_commit(txn_coml_commit); + _deq_hdr._deq_rid = drid; + _deq_hdr._xidsize = xidlen; + _deq_tail._rid = rid; + _xidp = xidp; + _buff = 0; +} + +u_int32_t +deq_rec::encode(void* wptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks) +{ + assert(wptr != 0); + assert(max_size_dblks > 0); + if (_xidp == 0) + assert(_deq_hdr._xidsize == 0); + + std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + std::size_t rem = max_size_dblks * JRNL_DBLK_SIZE; + std::size_t wr_cnt = 0; + if (rec_offs_dblks) // Continuation of split dequeue record (over 2 or more pages) + { + if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required + { + rec_offs -= sizeof(_deq_hdr); + std::size_t wsize = _deq_hdr._xidsize > rec_offs ? _deq_hdr._xidsize - rec_offs : 0; + std::size_t wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= _deq_hdr._xidsize - wsize2; + if (rem) + { + wsize = sizeof(_deq_tail) > rec_offs ? sizeof(_deq_tail) - rec_offs : 0; + wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy((char*)wptr + wr_cnt, (char*)&_deq_tail + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= sizeof(_deq_tail) - wsize2; + } + assert(rem == 0); + assert(rec_offs == 0); + } + else // No further split required + { + rec_offs -= sizeof(_deq_hdr); + std::size_t wsize = _deq_hdr._xidsize > rec_offs ? _deq_hdr._xidsize - rec_offs : 0; + if (wsize) + { + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + } + rec_offs -= _deq_hdr._xidsize - wsize; + wsize = sizeof(_deq_tail) > rec_offs ? sizeof(_deq_tail) - rec_offs : 0; + if (wsize) + { + std::memcpy((char*)wptr + wr_cnt, (char*)&_deq_tail + rec_offs, wsize); + wr_cnt += wsize; +#ifdef RHM_CLEAN + std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * JRNL_DBLK_SIZE; + std::memset((char*)wptr + wr_cnt, RHM_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + rec_offs -= sizeof(_deq_tail) - wsize; + assert(rec_offs == 0); + } + } + else // Start at beginning of data record + { + // Assumption: the header will always fit into the first dblk + std::memcpy(wptr, (void*)&_deq_hdr, sizeof(_deq_hdr)); + wr_cnt = sizeof(_deq_hdr); + if (size_dblks(rec_size()) > max_size_dblks) // Split required - can only occur with xid + { + std::size_t wsize; + rem -= sizeof(_deq_hdr); + if (rem) + { + wsize = rem >= _deq_hdr._xidsize ? _deq_hdr._xidsize : rem; + std::memcpy((char*)wptr + wr_cnt, _xidp, wsize); + wr_cnt += wsize; + rem -= wsize; + } + if (rem) + { + wsize = rem >= sizeof(_deq_tail) ? sizeof(_deq_tail) : rem; + std::memcpy((char*)wptr + wr_cnt, (void*)&_deq_tail, wsize); + wr_cnt += wsize; + rem -= wsize; + } + assert(rem == 0); + } + else // No split required + { + if (_deq_hdr._xidsize) + { + std::memcpy((char*)wptr + wr_cnt, _xidp, _deq_hdr._xidsize); + wr_cnt += _deq_hdr._xidsize; + std::memcpy((char*)wptr + wr_cnt, (void*)&_deq_tail, sizeof(_deq_tail)); + wr_cnt += sizeof(_deq_tail); + } +#ifdef RHM_CLEAN + std::size_t dblk_rec_size = size_dblks(rec_size()) * JRNL_DBLK_SIZE; + std::memset((char*)wptr + wr_cnt, RHM_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + } + return size_dblks(wr_cnt); +} + +u_int32_t +deq_rec::decode(rec_hdr& h, void* rptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks) +{ + assert(rptr != 0); + assert(max_size_dblks > 0); + + std::size_t rd_cnt = 0; + if (rec_offs_dblks) // Continuation of record on new page + { + const u_int32_t hdr_xid_dblks = size_dblks(deq_hdr::size() + _deq_hdr._xidsize); + const u_int32_t hdr_xid_tail_dblks = size_dblks(deq_hdr::size() + _deq_hdr._xidsize + + rec_tail::size()); + const std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + + if (hdr_xid_tail_dblks - rec_offs_dblks <= max_size_dblks) + { + // Remainder of xid fits within this page + if (rec_offs - deq_hdr::size() < _deq_hdr._xidsize) + { + // Part of xid still outstanding, copy remainder of xid and tail + const std::size_t xid_offs = rec_offs - deq_hdr::size(); + const std::size_t xid_rem = _deq_hdr._xidsize - xid_offs; + std::memcpy((char*)_buff + xid_offs, rptr, xid_rem); + rd_cnt = xid_rem; + std::memcpy((void*)&_deq_tail, ((char*)rptr + rd_cnt), sizeof(_deq_tail)); + chk_tail(); + rd_cnt += sizeof(_deq_tail); + } + else + { + // Tail or part of tail only outstanding, complete tail + const std::size_t tail_offs = rec_offs - deq_hdr::size() - _deq_hdr._xidsize; + const std::size_t tail_rem = rec_tail::size() - tail_offs; + std::memcpy((char*)&_deq_tail + tail_offs, rptr, tail_rem); + chk_tail(); + rd_cnt = tail_rem; + } + } + else if (hdr_xid_dblks - rec_offs_dblks <= max_size_dblks) + { + // Remainder of xid fits within this page, tail split + const std::size_t xid_offs = rec_offs - deq_hdr::size(); + const std::size_t xid_rem = _deq_hdr._xidsize - xid_offs; + std::memcpy((char*)_buff + xid_offs, rptr, xid_rem); + rd_cnt += xid_rem; + const std::size_t tail_rem = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + if (tail_rem) + { + std::memcpy((void*)&_deq_tail, ((char*)rptr + xid_rem), tail_rem); + rd_cnt += tail_rem; + } + } + else + { + // Remainder of xid split + const std::size_t xid_cp_size = (max_size_dblks * JRNL_DBLK_SIZE); + std::memcpy((char*)_buff + rec_offs - deq_hdr::size(), rptr, xid_cp_size); + rd_cnt += xid_cp_size; + } + } + else // Start of record + { + // Get and check header + _deq_hdr.hdr_copy(h); + rd_cnt = sizeof(rec_hdr); + _deq_hdr._deq_rid = *(u_int64_t*)((char*)rptr + rd_cnt); + rd_cnt += sizeof(u_int64_t); +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + rd_cnt += sizeof(u_int32_t); // Filler 0 +#endif + _deq_hdr._xidsize = *(std::size_t*)((char*)rptr + rd_cnt); + rd_cnt = _deq_hdr.size(); + chk_hdr(); + if (_deq_hdr._xidsize) + { + _buff = std::malloc(_deq_hdr._xidsize); + MALLOC_CHK(_buff, "_buff", "deq_rec", "decode"); + const u_int32_t hdr_xid_dblks = size_dblks(deq_hdr::size() + _deq_hdr._xidsize); + const u_int32_t hdr_xid_tail_dblks = size_dblks(deq_hdr::size() + _deq_hdr._xidsize + + rec_tail::size()); + + // Check if record (header + xid + tail) fits within this page, we can check the + // tail before the expense of copying data to memory + if (hdr_xid_tail_dblks <= max_size_dblks) + { + // Entire header, xid and tail fits within this page + std::memcpy(_buff, (char*)rptr + rd_cnt, _deq_hdr._xidsize); + rd_cnt += _deq_hdr._xidsize; + std::memcpy((void*)&_deq_tail, (char*)rptr + rd_cnt, sizeof(_deq_tail)); + rd_cnt += sizeof(_deq_tail); + chk_tail(); + } + else if (hdr_xid_dblks <= max_size_dblks) + { + // Entire header and xid fit within this page, tail split + std::memcpy(_buff, (char*)rptr + rd_cnt, _deq_hdr._xidsize); + rd_cnt += _deq_hdr._xidsize; + const std::size_t tail_rem = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + if (tail_rem) + { + std::memcpy((void*)&_deq_tail, (char*)rptr + rd_cnt, tail_rem); + rd_cnt += tail_rem; + } + } + else + { + // Header fits within this page, xid split + const std::size_t xid_cp_size = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + std::memcpy(_buff, (char*)rptr + rd_cnt, xid_cp_size); + rd_cnt += xid_cp_size; + } + } + } + return size_dblks(rd_cnt); +} + +bool +deq_rec::rcv_decode(rec_hdr h, std::ifstream* ifsp, std::size_t& rec_offs) +{ + if (rec_offs == 0) + { + _deq_hdr.hdr_copy(h); + ifsp->read((char*)&_deq_hdr._deq_rid, sizeof(u_int64_t)); +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + ifsp->ignore(sizeof(u_int32_t)); // _filler0 +#endif + ifsp->read((char*)&_deq_hdr._xidsize, sizeof(std::size_t)); +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + ifsp->ignore(sizeof(u_int32_t)); // _filler0 +#endif + rec_offs = sizeof(_deq_hdr); + // Read header, allocate (if req'd) for xid + if (_deq_hdr._xidsize) + { + _buff = std::malloc(_deq_hdr._xidsize); + MALLOC_CHK(_buff, "_buff", "enq_rec", "rcv_decode"); + } + } + if (rec_offs < sizeof(_deq_hdr) + _deq_hdr._xidsize) + { + // Read xid (or continue reading xid) + std::size_t offs = rec_offs - sizeof(_deq_hdr); + ifsp->read((char*)_buff + offs, _deq_hdr._xidsize - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < _deq_hdr._xidsize - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + if (rec_offs < sizeof(_deq_hdr) + + (_deq_hdr._xidsize ? _deq_hdr._xidsize + sizeof(rec_tail) : 0)) + { + // Read tail (or continue reading tail) + std::size_t offs = rec_offs - sizeof(_deq_hdr) - _deq_hdr._xidsize; + ifsp->read((char*)&_deq_tail + offs, sizeof(rec_tail) - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < sizeof(rec_tail) - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + ifsp->ignore(rec_size_dblks() * JRNL_DBLK_SIZE - rec_size()); + if (_deq_hdr._xidsize) + chk_tail(); // Throws if tail invalid or record incomplete + assert(!ifsp->fail() && !ifsp->bad()); + return true; +} + +std::size_t +deq_rec::get_xid(void** const xidpp) +{ + if (!_buff) + { + *xidpp = 0; + return 0; + } + *xidpp = _buff; + return _deq_hdr._xidsize; +} + +std::string& +deq_rec::str(std::string& str) const +{ + std::ostringstream oss; + oss << "deq_rec: m=" << _deq_hdr._magic; + oss << " v=" << (int)_deq_hdr._version; + oss << " rid=" << _deq_hdr._rid; + oss << " drid=" << _deq_hdr._deq_rid; + if (_xidp) + oss << " xid=\"" << _xidp << "\""; + str.append(oss.str()); + return str; +} + +std::size_t +deq_rec::xid_size() const +{ + return _deq_hdr._xidsize; +} + +std::size_t +deq_rec::rec_size() const +{ + return deq_hdr::size() + (_deq_hdr._xidsize ? _deq_hdr._xidsize + rec_tail::size() : 0); +} + +void +deq_rec::chk_hdr() const +{ + jrec::chk_hdr(_deq_hdr); + if (_deq_hdr._magic != RHM_JDAT_DEQ_MAGIC) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "deq magic: rid=0x" << std::setw(16) << _deq_hdr._rid; + oss << ": expected=0x" << std::setw(8) << RHM_JDAT_DEQ_MAGIC; + oss << " read=0x" << std::setw(2) << (int)_deq_hdr._magic; + throw jexception(jerrno::JERR_JREC_BADRECHDR, oss.str(), "deq_rec", "chk_hdr"); + } +} + +void +deq_rec::chk_hdr(u_int64_t rid) const +{ + chk_hdr(); + jrec::chk_rid(_deq_hdr, rid); +} + +void +deq_rec::chk_tail() const +{ + jrec::chk_tail(_deq_tail, _deq_hdr); +} + +void +deq_rec::clean() +{ + // clean up allocated memory here +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/deq_rec.h b/cpp/src/qpid/legacystore/jrnl/deq_rec.h new file mode 100644 index 0000000000..d870b658da --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/deq_rec.h @@ -0,0 +1,103 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file deq_rec.h + * + * Qpid asynchronous store plugin library + * + * This file contains the code for the mrg::journal::deq_rec (journal dequeue + * record) class. See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_DEQ_REQ_H +#define QPID_LEGACYSTORE_JRNL_DEQ_REQ_H + +namespace mrg +{ +namespace journal +{ +class deq_rec; +} +} + +#include <cstddef> +#include "qpid/legacystore/jrnl/deq_hdr.h" +#include "qpid/legacystore/jrnl/jrec.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \class deq_rec + * \brief Class to handle a single journal dequeue record. + */ + class deq_rec : public jrec + { + private: + deq_hdr _deq_hdr; ///< Dequeue header + const void* _xidp; ///< xid pointer for encoding (writing to disk) + void* _buff; ///< Pointer to buffer to receive data read from disk + rec_tail _deq_tail; ///< Record tail, only encoded if XID is present + + public: + // constructor used for read operations and xid will have memory allocated + deq_rec(); + // constructor used for write operations, where xid already exists + deq_rec(const u_int64_t rid, const u_int64_t drid, const void* const xidp, + const std::size_t xidlen, const bool owi, const bool txn_coml_commit); + virtual ~deq_rec(); + + // Prepare instance for use in reading data from journal + void reset(); + // Prepare instance for use in writing data to journal + void reset(const u_int64_t rid, const u_int64_t drid, const void* const xidp, + const std::size_t xidlen, const bool owi, const bool txn_coml_commit); + u_int32_t encode(void* wptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks); + u_int32_t decode(rec_hdr& h, void* rptr, u_int32_t rec_offs_dblks, + u_int32_t max_size_dblks); + // Decode used for recover + bool rcv_decode(rec_hdr h, std::ifstream* ifsp, std::size_t& rec_offs); + + inline bool is_txn_coml_commit() const { return _deq_hdr.is_txn_coml_commit(); } + inline u_int64_t rid() const { return _deq_hdr._rid; } + inline u_int64_t deq_rid() const { return _deq_hdr._deq_rid; } + std::size_t get_xid(void** const xidpp); + std::string& str(std::string& str) const; + inline std::size_t data_size() const { return 0; } // This record never carries data + std::size_t xid_size() const; + std::size_t rec_size() const; + + private: + virtual void chk_hdr() const; + virtual void chk_hdr(u_int64_t rid) const; + virtual void chk_tail() const; + virtual void clean(); + }; // class deq_rec + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_DEQ_REQ_H diff --git a/cpp/src/qpid/legacystore/jrnl/enq_hdr.h b/cpp/src/qpid/legacystore/jrnl/enq_hdr.h new file mode 100644 index 0000000000..0d1e6116be --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/enq_hdr.h @@ -0,0 +1,165 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file enq_hdr.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::enq_hdr (enueue header), + * used to start an enqueue record in the journal. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_ENQ_HDR_H +#define QPID_LEGACYSTORE_JRNL_ENQ_HDR_H + +#include <cstddef> +#include "qpid/legacystore/jrnl/rec_hdr.h" + +namespace mrg +{ +namespace journal +{ + +#pragma pack(1) + + /** + * \brief Struct for enqueue record. + * + * Struct for enqueue record. In addition to the common data, this header includes both the + * xid and data blob sizes. + * + * This header precedes all enqueue data in journal files. + * + * Record header info in binary format (32 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ -+ + * | magic | v | e | flags | | + * +---+---+---+---+---+---+---+---+ | struct hdr + * | rid | | + * +---+---+---+---+---+---+---+---+ -+ + * | xidsize | + * +---+---+---+---+---+---+---+---+ + * | dsize | + * +---+---+---+---+---+---+---+---+ + * v = file version (If the format or encoding of this file changes, then this + * number should be incremented) + * e = endian flag, false (0x00) for little endian, true (0x01) for big endian + * </pre> + * + * Note that journal files should be transferable between 32- and 64-bit + * hardware of the same endianness, but not between hardware of opposite + * entianness without some sort of binary conversion utility. Thus buffering + * will be needed for types that change size between 32- and 64-bit compiles. + */ + struct enq_hdr : rec_hdr + { +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler0; ///< Big-endian filler for 32-bit size_t +#endif + std::size_t _xidsize; ///< XID size +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler0; ///< Little-endian filler for 32-bit size_t +#endif +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler1; ///< Big-endian filler for 32-bit size_t +#endif + std::size_t _dsize; ///< Record data size +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler1; ///< Little-endian filler for 32-bit size_t +#endif + static const u_int16_t ENQ_HDR_TRANSIENT_MASK = 0x10; + static const u_int16_t ENQ_HDR_EXTERNAL_MASK = 0x20; + + /** + * \brief Default constructor, which sets all values to 0. + */ + inline enq_hdr(): rec_hdr(), +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif + _xidsize(0), +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler1(0), +#endif + _dsize(0) +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + , _filler1(0) +#endif + {} + + /** + * \brief Convenience constructor which initializes values during construction. + */ + inline enq_hdr(const u_int32_t magic, const u_int8_t version, const u_int64_t rid, + const std::size_t xidsize, const std::size_t dsize, const bool owi, + const bool transient = false): rec_hdr(magic, version, rid, owi), +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif + _xidsize(xidsize), +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler1(0), +#endif + _dsize(dsize) +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + , _filler1(0) +#endif + { set_transient(transient); } + + + inline bool is_transient() const { return _uflag & ENQ_HDR_TRANSIENT_MASK; } + + inline void set_transient(const bool transient) + { + _uflag = transient ? _uflag | ENQ_HDR_TRANSIENT_MASK : + _uflag & (~ENQ_HDR_TRANSIENT_MASK); + } + + inline bool is_external() const { return _uflag & ENQ_HDR_EXTERNAL_MASK; } + + inline void set_external(const bool external) + { + _uflag = external ? _uflag | ENQ_HDR_EXTERNAL_MASK : + _uflag & (~ENQ_HDR_EXTERNAL_MASK); + } + + /** + * \brief Returns the size of the header in bytes. + */ + inline static std::size_t size() { return sizeof(enq_hdr); } + }; + +#pragma pack() + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_ENQ_HDR_H diff --git a/cpp/src/qpid/legacystore/jrnl/enq_map.cpp b/cpp/src/qpid/legacystore/jrnl/enq_map.cpp new file mode 100644 index 0000000000..d024b704a7 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/enq_map.cpp @@ -0,0 +1,183 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file enq_map.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::enq_map (enqueue map). See + * comments in file enq_map.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/enq_map.h" + +#include <iomanip> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/slock.h" +#include <sstream> + + +namespace mrg +{ +namespace journal +{ + +// static return/error codes +int16_t enq_map::EMAP_DUP_RID = -3; +int16_t enq_map::EMAP_LOCKED = -2; +int16_t enq_map::EMAP_RID_NOT_FOUND = -1; +int16_t enq_map::EMAP_OK = 0; +int16_t enq_map::EMAP_FALSE = 0; +int16_t enq_map::EMAP_TRUE = 1; + +enq_map::enq_map(): + _map(), + _pfid_enq_cnt() +{} + +enq_map::~enq_map() {} + +void +enq_map::set_num_jfiles(const u_int16_t num_jfiles) +{ + _pfid_enq_cnt.resize(num_jfiles, 0); +} + + +int16_t +enq_map::insert_pfid(const u_int64_t rid, const u_int16_t pfid) +{ + return insert_pfid(rid, pfid, false); +} + +int16_t +enq_map::insert_pfid(const u_int64_t rid, const u_int16_t pfid, const bool locked) +{ + std::pair<emap_itr, bool> ret; + emap_data_struct rec(pfid, locked); + { + slock s(_mutex); + ret = _map.insert(emap_param(rid, rec)); + } + if (ret.second == false) + return EMAP_DUP_RID; + _pfid_enq_cnt.at(pfid)++; + return EMAP_OK; +} + +int16_t +enq_map::get_pfid(const u_int64_t rid) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + if (itr->second._lock) + return EMAP_LOCKED; + return itr->second._pfid; +} + +int16_t +enq_map::get_remove_pfid(const u_int64_t rid, const bool txn_flag) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + if (itr->second._lock && !txn_flag) // locked, but not a commit/abort + return EMAP_LOCKED; + u_int16_t pfid = itr->second._pfid; + _map.erase(itr); + _pfid_enq_cnt.at(pfid)--; + return pfid; +} + +bool +enq_map::is_enqueued(const u_int64_t rid, bool ignore_lock) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return false; + if (!ignore_lock && itr->second._lock) // locked + return false; + return true; +} + +int16_t +enq_map::lock(const u_int64_t rid) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + itr->second._lock = true; + return EMAP_OK; +} + +int16_t +enq_map::unlock(const u_int64_t rid) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + itr->second._lock = false; + return EMAP_OK; +} + +int16_t +enq_map::is_locked(const u_int64_t rid) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + return itr->second._lock ? EMAP_TRUE : EMAP_FALSE; +} + +void +enq_map::rid_list(std::vector<u_int64_t>& rv) +{ + rv.clear(); + { + slock s(_mutex); + for (emap_itr itr = _map.begin(); itr != _map.end(); itr++) + rv.push_back(itr->first); + } +} + +void +enq_map::pfid_list(std::vector<u_int16_t>& fv) +{ + fv.clear(); + { + slock s(_mutex); + for (emap_itr itr = _map.begin(); itr != _map.end(); itr++) + fv.push_back(itr->second._pfid); + } +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/enq_map.h b/cpp/src/qpid/legacystore/jrnl/enq_map.h new file mode 100644 index 0000000000..75404afebe --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/enq_map.h @@ -0,0 +1,127 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file enq_map.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::enq_map (enqueue map). + * See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_ENQ_MAP_H +#define QPID_LEGACYSTORE_JRNL_ENQ_MAP_H + +namespace mrg +{ +namespace journal +{ +class enq_map; +} +} + +#include "qpid/legacystore/jrnl/jexception.h" +#include "qpid/legacystore/jrnl/smutex.h" +#include <map> +#include <pthread.h> +#include <vector> + +namespace mrg +{ +namespace journal +{ + + /** + * \class enq_map + * \brief Class for storing the physical file id (pfid) and a transaction locked flag for each enqueued + * data block using the record id (rid) as a key. This is the primary mechanism for + * deterimining the enqueue low water mark: if a pfid exists in this map, then there is + * at least one still-enqueued record in that file. (The transaction map must also be + * clear, however.) + * + * Map rids against pfid and lock status. As records are enqueued, they are added to this + * map, and as they are dequeued, they are removed. An enqueue is locked when a transactional + * dequeue is pending that has been neither committed nor aborted. + * <pre> + * key data + * + * rid1 --- [ pfid, txn_lock ] + * rid2 --- [ pfid, txn_lock ] + * rid3 --- [ pfid, txn_lock ] + * ... + * </pre> + */ + class enq_map + { + public: + // return/error codes + static int16_t EMAP_DUP_RID; + static int16_t EMAP_LOCKED; + static int16_t EMAP_RID_NOT_FOUND; + static int16_t EMAP_OK; + static int16_t EMAP_FALSE; + static int16_t EMAP_TRUE; + + private: + + struct emap_data_struct + { + u_int16_t _pfid; + bool _lock; + emap_data_struct(const u_int16_t pfid, const bool lock) : _pfid(pfid), _lock(lock) {} + }; + typedef std::pair<u_int64_t, emap_data_struct> emap_param; + typedef std::map<u_int64_t, emap_data_struct> emap; + typedef emap::iterator emap_itr; + + emap _map; + smutex _mutex; + std::vector<u_int32_t> _pfid_enq_cnt; + + public: + enq_map(); + virtual ~enq_map(); + + void set_num_jfiles(const u_int16_t num_jfiles); + inline u_int32_t get_enq_cnt(const u_int16_t pfid) const { return _pfid_enq_cnt.at(pfid); }; + + int16_t insert_pfid(const u_int64_t rid, const u_int16_t pfid); // 0=ok; -3=duplicate rid; + int16_t insert_pfid(const u_int64_t rid, const u_int16_t pfid, const bool locked); // 0=ok; -3=duplicate rid; + int16_t get_pfid(const u_int64_t rid); // >=0=pfid; -1=rid not found; -2=locked + int16_t get_remove_pfid(const u_int64_t rid, const bool txn_flag = false); // >=0=pfid; -1=rid not found; -2=locked + bool is_enqueued(const u_int64_t rid, bool ignore_lock = false); + int16_t lock(const u_int64_t rid); // 0=ok; -1=rid not found + int16_t unlock(const u_int64_t rid); // 0=ok; -1=rid not found + int16_t is_locked(const u_int64_t rid); // 1=true; 0=false; -1=rid not found + inline void clear() { _map.clear(); } + inline bool empty() const { return _map.empty(); } + inline u_int32_t size() const { return u_int32_t(_map.size()); } + void rid_list(std::vector<u_int64_t>& rv); + void pfid_list(std::vector<u_int16_t>& fv); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_ENQ_MAP_H diff --git a/cpp/src/qpid/legacystore/jrnl/enq_rec.cpp b/cpp/src/qpid/legacystore/jrnl/enq_rec.cpp new file mode 100644 index 0000000000..468599836b --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/enq_rec.cpp @@ -0,0 +1,638 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file enq_rec.cpp + * + * Qpid asynchronous store plugin library + * + * This file contains the code for the mrg::journal::enq_rec (journal enqueue + * record) class. See comments in file enq_rec.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/enq_rec.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <cstring> +#include <iomanip> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +// Constructor used for read operations, where buf contains preallocated space to receive data. +enq_rec::enq_rec(): + jrec(), // superclass + _enq_hdr(RHM_JDAT_ENQ_MAGIC, RHM_JDAT_VERSION, 0, 0, 0, false, false), + _xidp(0), + _data(0), + _buff(0), + _enq_tail(_enq_hdr) +{} + +// Constructor used for transactional write operations, where dbuf contains data to be written. +enq_rec::enq_rec(const u_int64_t rid, const void* const dbuf, const std::size_t dlen, + const void* const xidp, const std::size_t xidlen, const bool owi, const bool transient): + jrec(), // superclass + _enq_hdr(RHM_JDAT_ENQ_MAGIC, RHM_JDAT_VERSION, rid, xidlen, dlen, owi, transient), + _xidp(xidp), + _data(dbuf), + _buff(0), + _enq_tail(_enq_hdr) +{} + +enq_rec::~enq_rec() +{ + clean(); +} + +// Prepare instance for use in reading data from journal, where buf contains preallocated space +// to receive data. +void +enq_rec::reset() +{ + _enq_hdr._rid = 0; + _enq_hdr.set_owi(false); + _enq_hdr.set_transient(false); + _enq_hdr._xidsize = 0; + _enq_hdr._dsize = 0; + _xidp = 0; + _data = 0; + _buff = 0; + _enq_tail._rid = 0; +} + +// Prepare instance for use in writing transactional data to journal, where dbuf contains data to +// be written. +void +enq_rec::reset(const u_int64_t rid, const void* const dbuf, const std::size_t dlen, + const void* const xidp, const std::size_t xidlen, const bool owi, const bool transient, + const bool external) +{ + _enq_hdr._rid = rid; + _enq_hdr.set_owi(owi); + _enq_hdr.set_transient(transient); + _enq_hdr.set_external(external); + _enq_hdr._xidsize = xidlen; + _enq_hdr._dsize = dlen; + _xidp = xidp; + _data = dbuf; + _buff = 0; + _enq_tail._rid = rid; +} + +u_int32_t +enq_rec::encode(void* wptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks) +{ + assert(wptr != 0); + assert(max_size_dblks > 0); + if (_xidp == 0) + assert(_enq_hdr._xidsize == 0); + + std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + std::size_t rem = max_size_dblks * JRNL_DBLK_SIZE; + std::size_t wr_cnt = 0; + if (rec_offs_dblks) // Continuation of split data record (over 2 or more pages) + { + if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required + { + rec_offs -= sizeof(_enq_hdr); + std::size_t wsize = _enq_hdr._xidsize > rec_offs ? _enq_hdr._xidsize - rec_offs : 0; + std::size_t wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt = wsize; + rem -= wsize; + } + rec_offs -= _enq_hdr._xidsize - wsize2; + if (rem && !_enq_hdr.is_external()) + { + wsize = _enq_hdr._dsize > rec_offs ? _enq_hdr._dsize - rec_offs : 0; + wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy((char*)wptr + wr_cnt, (const char*)_data + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= _enq_hdr._dsize - wsize2; + } + if (rem) + { + wsize = sizeof(_enq_tail) > rec_offs ? sizeof(_enq_tail) - rec_offs : 0; + wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy((char*)wptr + wr_cnt, (char*)&_enq_tail + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= sizeof(_enq_tail) - wsize2; + } + assert(rem == 0); + assert(rec_offs == 0); + } + else // No further split required + { + rec_offs -= sizeof(_enq_hdr); + std::size_t wsize = _enq_hdr._xidsize > rec_offs ? _enq_hdr._xidsize - rec_offs : 0; + if (wsize) + { + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + } + rec_offs -= _enq_hdr._xidsize - wsize; + wsize = _enq_hdr._dsize > rec_offs ? _enq_hdr._dsize - rec_offs : 0; + if (wsize && !_enq_hdr.is_external()) + { + std::memcpy((char*)wptr + wr_cnt, (const char*)_data + rec_offs, wsize); + wr_cnt += wsize; + } + rec_offs -= _enq_hdr._dsize - wsize; + wsize = sizeof(_enq_tail) > rec_offs ? sizeof(_enq_tail) - rec_offs : 0; + if (wsize) + { + std::memcpy((char*)wptr + wr_cnt, (char*)&_enq_tail + rec_offs, wsize); + wr_cnt += wsize; +#ifdef RHM_CLEAN + std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * JRNL_DBLK_SIZE; + std::memset((char*)wptr + wr_cnt, RHM_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + rec_offs -= sizeof(_enq_tail) - wsize; + assert(rec_offs == 0); + } + } + else // Start at beginning of data record + { + // Assumption: the header will always fit into the first dblk + std::memcpy(wptr, (void*)&_enq_hdr, sizeof(_enq_hdr)); + wr_cnt = sizeof(_enq_hdr); + if (size_dblks(rec_size()) > max_size_dblks) // Split required + { + std::size_t wsize; + rem -= sizeof(_enq_hdr); + if (rem) + { + wsize = rem >= _enq_hdr._xidsize ? _enq_hdr._xidsize : rem; + std::memcpy((char*)wptr + wr_cnt, _xidp, wsize); + wr_cnt += wsize; + rem -= wsize; + } + if (rem && !_enq_hdr.is_external()) + { + wsize = rem >= _enq_hdr._dsize ? _enq_hdr._dsize : rem; + std::memcpy((char*)wptr + wr_cnt, _data, wsize); + wr_cnt += wsize; + rem -= wsize; + } + if (rem) + { + wsize = rem >= sizeof(_enq_tail) ? sizeof(_enq_tail) : rem; + std::memcpy((char*)wptr + wr_cnt, (void*)&_enq_tail, wsize); + wr_cnt += wsize; + rem -= wsize; + } + assert(rem == 0); + } + else // No split required + { + if (_enq_hdr._xidsize) + { + std::memcpy((char*)wptr + wr_cnt, _xidp, _enq_hdr._xidsize); + wr_cnt += _enq_hdr._xidsize; + } + if (!_enq_hdr.is_external()) + { + std::memcpy((char*)wptr + wr_cnt, _data, _enq_hdr._dsize); + wr_cnt += _enq_hdr._dsize; + } + std::memcpy((char*)wptr + wr_cnt, (void*)&_enq_tail, sizeof(_enq_tail)); + wr_cnt += sizeof(_enq_tail); +#ifdef RHM_CLEAN + std::size_t dblk_rec_size = size_dblks(rec_size()) * JRNL_DBLK_SIZE; + std::memset((char*)wptr + wr_cnt, RHM_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + } + return size_dblks(wr_cnt); +} + +u_int32_t +enq_rec::decode(rec_hdr& h, void* rptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks) +{ + assert(rptr != 0); + assert(max_size_dblks > 0); + + std::size_t rd_cnt = 0; + if (rec_offs_dblks) // Continuation of record on new page + { + const u_int32_t hdr_xid_data_size = enq_hdr::size() + _enq_hdr._xidsize + + (_enq_hdr.is_external() ? 0 : _enq_hdr._dsize); + const u_int32_t hdr_xid_data_tail_size = hdr_xid_data_size + rec_tail::size(); + const u_int32_t hdr_data_dblks = size_dblks(hdr_xid_data_size); + const u_int32_t hdr_tail_dblks = size_dblks(hdr_xid_data_tail_size); + const std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + const std::size_t offs = rec_offs - enq_hdr::size(); + + if (hdr_tail_dblks - rec_offs_dblks <= max_size_dblks) + { + // Remainder of record fits within this page + if (offs < _enq_hdr._xidsize) + { + // some XID still outstanding, copy remainder of XID, data and tail + const std::size_t rem = _enq_hdr._xidsize + _enq_hdr._dsize - offs; + std::memcpy((char*)_buff + offs, rptr, rem); + rd_cnt += rem; + std::memcpy((void*)&_enq_tail, ((char*)rptr + rd_cnt), sizeof(_enq_tail)); + chk_tail(); + rd_cnt += sizeof(_enq_tail); + } + else if (offs < _enq_hdr._xidsize + _enq_hdr._dsize && !_enq_hdr.is_external()) + { + // some data still outstanding, copy remainder of data and tail + const std::size_t data_offs = offs - _enq_hdr._xidsize; + const std::size_t data_rem = _enq_hdr._dsize - data_offs; + std::memcpy((char*)_buff + offs, rptr, data_rem); + rd_cnt += data_rem; + std::memcpy((void*)&_enq_tail, ((char*)rptr + rd_cnt), sizeof(_enq_tail)); + chk_tail(); + rd_cnt += sizeof(_enq_tail); + } + else + { + // Tail or part of tail only outstanding, complete tail + const std::size_t tail_offs = rec_offs - enq_hdr::size() - _enq_hdr._xidsize - + _enq_hdr._dsize; + const std::size_t tail_rem = rec_tail::size() - tail_offs; + std::memcpy((char*)&_enq_tail + tail_offs, rptr, tail_rem); + chk_tail(); + rd_cnt = tail_rem; + } + } + else if (hdr_data_dblks - rec_offs_dblks <= max_size_dblks) + { + // Remainder of xid & data fits within this page; tail split + + /* + * TODO: This section needs revision. Since it is known that the end of the page falls within the + * tail record, it is only necessary to write from the current offset to the end of the page under + * all circumstances. The multiple if/else combinations may be eliminated, as well as one memcpy() + * operation. + * + * Also note that Coverity has detected a possible memory overwrite in this block. It occurs if + * both the following two if() stmsts (numbered) are false. With rd_cnt = 0, this would result in + * the value of tail_rem > sizeof(tail_rec). Practically, this could only happen if the start and + * end of a page both fall within the same tail record, in which case the tail would have to be + * (much!) larger. However, the logic here does not account for this possibility. + * + * If the optimization above is undertaken, this code would probably be removed. + */ + if (offs < _enq_hdr._xidsize) // 1 + { + // some XID still outstanding, copy remainder of XID and data + const std::size_t rem = _enq_hdr._xidsize + _enq_hdr._dsize - offs; + std::memcpy((char*)_buff + offs, rptr, rem); + rd_cnt += rem; + } + else if (offs < _enq_hdr._xidsize + _enq_hdr._dsize && !_enq_hdr.is_external()) // 2 + { + // some data still outstanding, copy remainder of data + const std::size_t data_offs = offs - _enq_hdr._xidsize; + const std::size_t data_rem = _enq_hdr._dsize - data_offs; + std::memcpy((char*)_buff + offs, rptr, data_rem); + rd_cnt += data_rem; + } + const std::size_t tail_rem = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + if (tail_rem) + { + std::memcpy((void*)&_enq_tail, ((char*)rptr + rd_cnt), tail_rem); + rd_cnt += tail_rem; + } + } + else + { + // Since xid and data are contiguous, both fit within current page - copy whole page + const std::size_t data_cp_size = (max_size_dblks * JRNL_DBLK_SIZE); + std::memcpy((char*)_buff + offs, rptr, data_cp_size); + rd_cnt += data_cp_size; + } + } + else // Start of record + { + // Get and check header + _enq_hdr.hdr_copy(h); + rd_cnt = sizeof(rec_hdr); +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + rd_cnt += sizeof(u_int32_t); // Filler 0 +#endif + _enq_hdr._xidsize = *(std::size_t*)((char*)rptr + rd_cnt); + rd_cnt += sizeof(std::size_t); +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + rd_cnt += sizeof(u_int32_t); // Filler 0 +#endif +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + rd_cnt += sizeof(u_int32_t); // Filler 1 +#endif + _enq_hdr._dsize = *(std::size_t*)((char*)rptr + rd_cnt); + rd_cnt = _enq_hdr.size(); + chk_hdr(); + if (_enq_hdr._xidsize + (_enq_hdr.is_external() ? 0 : _enq_hdr._dsize)) + { + _buff = std::malloc(_enq_hdr._xidsize + (_enq_hdr.is_external() ? 0 : _enq_hdr._dsize)); + MALLOC_CHK(_buff, "_buff", "enq_rec", "decode"); + + const u_int32_t hdr_xid_size = enq_hdr::size() + _enq_hdr._xidsize; + const u_int32_t hdr_xid_data_size = hdr_xid_size + (_enq_hdr.is_external() ? 0 : _enq_hdr._dsize); + const u_int32_t hdr_xid_data_tail_size = hdr_xid_data_size + rec_tail::size(); + const u_int32_t hdr_xid_dblks = size_dblks(hdr_xid_size); + const u_int32_t hdr_data_dblks = size_dblks(hdr_xid_data_size); + const u_int32_t hdr_tail_dblks = size_dblks(hdr_xid_data_tail_size); + // Check if record (header + data + tail) fits within this page, we can check the + // tail before the expense of copying data to memory + if (hdr_tail_dblks <= max_size_dblks) + { + // Header, xid, data and tail fits within this page + if (_enq_hdr._xidsize) + { + std::memcpy(_buff, (char*)rptr + rd_cnt, _enq_hdr._xidsize); + rd_cnt += _enq_hdr._xidsize; + } + if (_enq_hdr._dsize && !_enq_hdr.is_external()) + { + std::memcpy((char*)_buff + _enq_hdr._xidsize, (char*)rptr + rd_cnt, + _enq_hdr._dsize); + rd_cnt += _enq_hdr._dsize; + } + std::memcpy((void*)&_enq_tail, (char*)rptr + rd_cnt, sizeof(_enq_tail)); + chk_tail(); + rd_cnt += sizeof(_enq_tail); + } + else if (hdr_data_dblks <= max_size_dblks) + { + // Header, xid and data fit within this page, tail split or separated + if (_enq_hdr._xidsize) + { + std::memcpy(_buff, (char*)rptr + rd_cnt, _enq_hdr._xidsize); + rd_cnt += _enq_hdr._xidsize; + } + if (_enq_hdr._dsize && !_enq_hdr.is_external()) + { + std::memcpy((char*)_buff + _enq_hdr._xidsize, (char*)rptr + rd_cnt, + _enq_hdr._dsize); + rd_cnt += _enq_hdr._dsize; + } + const std::size_t tail_rem = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + if (tail_rem) + { + std::memcpy((void*)&_enq_tail, (char*)rptr + rd_cnt, tail_rem); + rd_cnt += tail_rem; + } + } + else if (hdr_xid_dblks <= max_size_dblks) + { + // Header and xid fits within this page, data split or separated + if (_enq_hdr._xidsize) + { + std::memcpy(_buff, (char*)rptr + rd_cnt, _enq_hdr._xidsize); + rd_cnt += _enq_hdr._xidsize; + } + if (_enq_hdr._dsize && !_enq_hdr.is_external()) + { + const std::size_t data_cp_size = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + std::memcpy((char*)_buff + _enq_hdr._xidsize, (char*)rptr + rd_cnt, data_cp_size); + rd_cnt += data_cp_size; + } + } + else + { + // Header fits within this page, xid split or separated + const std::size_t data_cp_size = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + std::memcpy(_buff, (char*)rptr + rd_cnt, data_cp_size); + rd_cnt += data_cp_size; + } + } + } + return size_dblks(rd_cnt); +} + +bool +enq_rec::rcv_decode(rec_hdr h, std::ifstream* ifsp, std::size_t& rec_offs) +{ + if (rec_offs == 0) + { + // Read header, allocate (if req'd) for xid + _enq_hdr.hdr_copy(h); +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + ifsp->ignore(sizeof(u_int32_t)); // _filler0 +#endif + ifsp->read((char*)&_enq_hdr._xidsize, sizeof(std::size_t)); +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + ifsp->ignore(sizeof(u_int32_t)); // _filler0 +#endif +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + ifsp->ignore(sizeof(u_int32_t)); // _filler1 +#endif + ifsp->read((char*)&_enq_hdr._dsize, sizeof(std::size_t)); +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + ifsp->ignore(sizeof(u_int32_t)); // _filler1 +#endif + rec_offs = sizeof(_enq_hdr); + if (_enq_hdr._xidsize) + { + _buff = std::malloc(_enq_hdr._xidsize); + MALLOC_CHK(_buff, "_buff", "enq_rec", "rcv_decode"); + } + } + if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize) + { + // Read xid (or continue reading xid) + std::size_t offs = rec_offs - sizeof(_enq_hdr); + ifsp->read((char*)_buff + offs, _enq_hdr._xidsize - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < _enq_hdr._xidsize - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + if (!_enq_hdr.is_external()) + { + if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize + _enq_hdr._dsize) + { + // Ignore data (or continue ignoring data) + std::size_t offs = rec_offs - sizeof(_enq_hdr) - _enq_hdr._xidsize; + ifsp->ignore(_enq_hdr._dsize - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < _enq_hdr._dsize - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + } + if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize + + (_enq_hdr.is_external() ? 0 : _enq_hdr._dsize) + sizeof(rec_tail)) + { + // Read tail (or continue reading tail) + std::size_t offs = rec_offs - sizeof(_enq_hdr) - _enq_hdr._xidsize; + if (!_enq_hdr.is_external()) + offs -= _enq_hdr._dsize; + ifsp->read((char*)&_enq_tail + offs, sizeof(rec_tail) - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < sizeof(rec_tail) - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + ifsp->ignore(rec_size_dblks() * JRNL_DBLK_SIZE - rec_size()); + chk_tail(); // Throws if tail invalid or record incomplete + assert(!ifsp->fail() && !ifsp->bad()); + return true; +} + +std::size_t +enq_rec::get_xid(void** const xidpp) +{ + if (!_buff || !_enq_hdr._xidsize) + { + *xidpp = 0; + return 0; + } + *xidpp = _buff; + return _enq_hdr._xidsize; +} + +std::size_t +enq_rec::get_data(void** const datapp) +{ + if (!_buff) + { + *datapp = 0; + return 0; + } + if (_enq_hdr.is_external()) + *datapp = 0; + else + *datapp = (void*)((char*)_buff + _enq_hdr._xidsize); + return _enq_hdr._dsize; +} + +std::string& +enq_rec::str(std::string& str) const +{ + std::ostringstream oss; + oss << "enq_rec: m=" << _enq_hdr._magic; + oss << " v=" << (int)_enq_hdr._version; + oss << " rid=" << _enq_hdr._rid; + if (_xidp) + oss << " xid=\"" << _xidp << "\""; + oss << " len=" << _enq_hdr._dsize; + str.append(oss.str()); + return str; +} + +std::size_t +enq_rec::rec_size() const +{ + return rec_size(_enq_hdr._xidsize, _enq_hdr._dsize, _enq_hdr.is_external()); +} + +std::size_t +enq_rec::rec_size(const std::size_t xidsize, const std::size_t dsize, const bool external) +{ + if (external) + return enq_hdr::size() + xidsize + rec_tail::size(); + return enq_hdr::size() + xidsize + dsize + rec_tail::size(); +} + +void +enq_rec::set_rid(const u_int64_t rid) +{ + _enq_hdr._rid = rid; + _enq_tail._rid = rid; +} + +void +enq_rec::chk_hdr() const +{ + jrec::chk_hdr(_enq_hdr); + if (_enq_hdr._magic != RHM_JDAT_ENQ_MAGIC) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "enq magic: rid=0x" << std::setw(16) << _enq_hdr._rid; + oss << ": expected=0x" << std::setw(8) << RHM_JDAT_ENQ_MAGIC; + oss << " read=0x" << std::setw(2) << (int)_enq_hdr._magic; + throw jexception(jerrno::JERR_JREC_BADRECHDR, oss.str(), "enq_rec", "chk_hdr"); + } +} + +void +enq_rec::chk_hdr(u_int64_t rid) const +{ + chk_hdr(); + jrec::chk_rid(_enq_hdr, rid); +} + +void +enq_rec::chk_tail() const +{ + jrec::chk_tail(_enq_tail, _enq_hdr); +} + +void +enq_rec::clean() +{ + // clean up allocated memory here +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/enq_rec.h b/cpp/src/qpid/legacystore/jrnl/enq_rec.h new file mode 100644 index 0000000000..805a96a1aa --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/enq_rec.h @@ -0,0 +1,116 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file enq_rec.h + * + * Qpid asynchronous store plugin library + * + * This file contains the code for the mrg::journal::enq_rec (journal enqueue + * record) class. See class documentation for details. + */ + +#ifndef QPID_LEGACYSTORE_JRNL_ENQ_REC_H +#define QPID_LEGACYSTORE_JRNL_ENQ_REC_H + +namespace mrg +{ +namespace journal +{ +class enq_rec; +} +} + +#include <cstddef> +#include "qpid/legacystore/jrnl/enq_hdr.h" +#include "qpid/legacystore/jrnl/jrec.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \class enq_rec + * \brief Class to handle a single journal enqueue record. + */ + class enq_rec : public jrec + { + private: + enq_hdr _enq_hdr; + const void* _xidp; ///< xid pointer for encoding (for writing to disk) + const void* _data; ///< Pointer to data to be written to disk + void* _buff; ///< Pointer to buffer to receive data read from disk + rec_tail _enq_tail; + + public: + /** + * \brief Constructor used for read operations. + */ + enq_rec(); + + /** + * \brief Constructor used for write operations, where mbuf contains data to be written. + */ + enq_rec(const u_int64_t rid, const void* const dbuf, const std::size_t dlen, + const void* const xidp, const std::size_t xidlen, const bool owi, const bool transient); + + /** + * \brief Destructor + */ + virtual ~enq_rec(); + + // Prepare instance for use in reading data from journal, xid and data will be allocated + void reset(); + // Prepare instance for use in writing data to journal + void reset(const u_int64_t rid, const void* const dbuf, const std::size_t dlen, + const void* const xidp, const std::size_t xidlen, const bool owi, const bool transient, + const bool external); + + u_int32_t encode(void* wptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks); + u_int32_t decode(rec_hdr& h, void* rptr, u_int32_t rec_offs_dblks, + u_int32_t max_size_dblks); + // Decode used for recover + bool rcv_decode(rec_hdr h, std::ifstream* ifsp, std::size_t& rec_offs); + + std::size_t get_xid(void** const xidpp); + std::size_t get_data(void** const datapp); + inline bool is_transient() const { return _enq_hdr.is_transient(); } + inline bool is_external() const { return _enq_hdr.is_external(); } + std::string& str(std::string& str) const; + inline std::size_t data_size() const { return _enq_hdr._dsize; } + inline std::size_t xid_size() const { return _enq_hdr._xidsize; } + std::size_t rec_size() const; + static std::size_t rec_size(const std::size_t xidsize, const std::size_t dsize, const bool external); + inline u_int64_t rid() const { return _enq_hdr._rid; } + void set_rid(const u_int64_t rid); + + private: + void chk_hdr() const; + void chk_hdr(u_int64_t rid) const; + void chk_tail() const; + virtual void clean(); + }; // class enq_rec + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_ENQ_REC_H diff --git a/cpp/src/qpid/legacystore/jrnl/enums.h b/cpp/src/qpid/legacystore/jrnl/enums.h new file mode 100644 index 0000000000..169a13fa4d --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/enums.h @@ -0,0 +1,108 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file enums.h + * + * Qpid asynchronous store plugin library + * + * File containing definitions for namespace mrg::journal enums. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_ENUMS_H +#define QPID_LEGACYSTORE_JRNL_ENUMS_H + +namespace mrg +{ +namespace journal +{ + + // TODO: Change this to flags, as multiple of these conditions may exist simultaneously + /** + * \brief Enumeration of possilbe return states from journal read and write operations. + */ + enum _iores + { + RHM_IORES_SUCCESS = 0, ///< Success: IO operation completed noramlly. + RHM_IORES_PAGE_AIOWAIT, ///< IO operation suspended - next page is waiting for AIO. + RHM_IORES_FILE_AIOWAIT, ///< IO operation suspended - next file is waiting for AIO. + RHM_IORES_EMPTY, ///< During read operations, nothing further is available to read. + RHM_IORES_RCINVALID, ///< Read page cache is invalid (ie obsolete or uninitialized) + RHM_IORES_ENQCAPTHRESH, ///< Enqueue capacity threshold (limit) reached. + RHM_IORES_FULL, ///< During write operations, the journal files are full. + RHM_IORES_BUSY, ///< Another blocking operation is in progress. + RHM_IORES_TXPENDING, ///< Operation blocked by pending transaction. + RHM_IORES_NOTIMPL ///< Function is not yet implemented. + }; + typedef _iores iores; + + static inline const char* iores_str(iores res) + { + switch (res) + { + case RHM_IORES_SUCCESS: return "RHM_IORES_SUCCESS"; + case RHM_IORES_PAGE_AIOWAIT: return "RHM_IORES_PAGE_AIOWAIT"; + case RHM_IORES_FILE_AIOWAIT: return "RHM_IORES_FILE_AIOWAIT"; + case RHM_IORES_EMPTY: return "RHM_IORES_EMPTY"; + case RHM_IORES_RCINVALID: return "RHM_IORES_RCINVALID"; + case RHM_IORES_ENQCAPTHRESH: return "RHM_IORES_ENQCAPTHRESH"; + case RHM_IORES_FULL: return "RHM_IORES_FULL"; + case RHM_IORES_BUSY: return "RHM_IORES_BUSY"; + case RHM_IORES_TXPENDING: return "RHM_IORES_TXPENDING"; + case RHM_IORES_NOTIMPL: return "RHM_IORES_NOTIMPL"; + } + return "<iores unknown>"; + } + + enum _log_level + { + LOG_TRACE = 0, + LOG_DEBUG, + LOG_INFO, + LOG_NOTICE, + LOG_WARN, + LOG_ERROR, + LOG_CRITICAL + }; + typedef _log_level log_level; + + static inline const char* log_level_str(log_level ll) + { + switch (ll) + { + case LOG_TRACE: return "TRACE"; + case LOG_DEBUG: return "DEBUG"; + case LOG_INFO: return "INFO"; + case LOG_NOTICE: return "NOTICE"; + case LOG_WARN: return "WARN"; + case LOG_ERROR: return "ERROR"; + case LOG_CRITICAL: return "CRITICAL"; + } + return "<log level unknown>"; + } + + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_ENUMS_H diff --git a/cpp/src/qpid/legacystore/jrnl/fcntl.cpp b/cpp/src/qpid/legacystore/jrnl/fcntl.cpp new file mode 100644 index 0000000000..fbb176667e --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/fcntl.cpp @@ -0,0 +1,375 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file fcntl.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::fcntl (non-logging file + * handle), used for controlling journal log files. See comments in file + * fcntl.h for details. + */ + +#include "qpid/legacystore/jrnl/fcntl.h" + +#include <cerrno> +#include <cstdlib> +#include <cstring> +#include <fcntl.h> +#include <iomanip> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include <sstream> +#include <unistd.h> + +namespace mrg +{ +namespace journal +{ + +fcntl::fcntl(const std::string& fbasename, const u_int16_t pfid, const u_int16_t lfid, const u_int32_t jfsize_sblks, + const rcvdat* const ro): + _fname(), + _pfid(pfid), + _lfid(lfid), + _ffull_dblks(JRNL_SBLK_SIZE * (jfsize_sblks + 1)), + _wr_fh(-1), + _rec_enqcnt(0), + _rd_subm_cnt_dblks(0), + _rd_cmpl_cnt_dblks(0), + _wr_subm_cnt_dblks(0), + _wr_cmpl_cnt_dblks(0), + _aio_cnt(0), + _fhdr_wr_aio_outstanding(false) +{ + initialize(fbasename, pfid, lfid, jfsize_sblks, ro); + open_wr_fh(); +} + +fcntl::~fcntl() +{ + close_wr_fh(); +} + +bool +fcntl::reset(const rcvdat* const ro) +{ + rd_reset(); + return wr_reset(ro); +} + +void +fcntl::rd_reset() +{ + _rd_subm_cnt_dblks = 0; + _rd_cmpl_cnt_dblks = 0; +} + +bool +fcntl::wr_reset(const rcvdat* const ro) +{ + if (ro) + { + if (!ro->_jempty) + { + if (ro->_lfid == _pfid) + { + _wr_subm_cnt_dblks = ro->_eo/JRNL_DBLK_SIZE; + _wr_cmpl_cnt_dblks = ro->_eo/JRNL_DBLK_SIZE; + } + else + { + _wr_subm_cnt_dblks = _ffull_dblks; + _wr_cmpl_cnt_dblks = _ffull_dblks; + } + _rec_enqcnt = ro->_enq_cnt_list[_pfid]; + return true; + } + } + // Journal overflow test - checks if the file to be reset still contains enqueued records + // or outstanding aios + if (_rec_enqcnt || _aio_cnt) + return false; + _wr_subm_cnt_dblks = 0; + _wr_cmpl_cnt_dblks = 0; + return true; +} + +int +fcntl::open_wr_fh() +{ + if (_wr_fh < 0) + { + _wr_fh = ::open(_fname.c_str(), O_WRONLY | O_DIRECT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); // 0644 -rw-r--r-- + if (_wr_fh < 0) + { + std::ostringstream oss; + oss << "pfid=" << _pfid << " lfid=" << _lfid << " file=\"" << _fname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_FCNTL_OPENWR, oss.str(), "fcntl", "open_fh"); + } + } + return _wr_fh; +} + +void +fcntl::close_wr_fh() +{ + if (_wr_fh >= 0) + { + ::close(_wr_fh); + _wr_fh = -1; + } +} + +u_int32_t +fcntl::add_enqcnt(u_int32_t a) +{ + _rec_enqcnt += a; + return _rec_enqcnt; +} + +u_int32_t +fcntl::decr_enqcnt() +{ + if (_rec_enqcnt == 0) + { + std::ostringstream oss; + oss << "pfid=" << _pfid << " lfid=" << _lfid; + throw jexception(jerrno::JERR__UNDERFLOW, oss.str(), "fcntl", "decr_enqcnt"); + } + return --_rec_enqcnt; +} + +u_int32_t +fcntl::subtr_enqcnt(u_int32_t s) +{ + if (_rec_enqcnt < s) + { + std::ostringstream oss; + oss << "pfid=" << _pfid << " lfid=" << _lfid << " rec_enqcnt=" << _rec_enqcnt << " decr=" << s; + throw jexception(jerrno::JERR__UNDERFLOW, oss.str(), "fcntl", "subtr_enqcnt"); + } + _rec_enqcnt -= s; + return _rec_enqcnt; +} + +u_int32_t +fcntl::add_rd_subm_cnt_dblks(u_int32_t a) +{ + if (_rd_subm_cnt_dblks + a > _wr_subm_cnt_dblks) + { + std::ostringstream oss; + oss << "pfid=" << _pfid << " lfid=" << _lfid << " rd_subm_cnt_dblks=" << _rd_subm_cnt_dblks << " incr=" << a; + oss << " wr_subm_cnt_dblks=" << _wr_subm_cnt_dblks; + throw jexception(jerrno::JERR_FCNTL_RDOFFSOVFL, oss.str(), "fcntl", "add_rd_subm_cnt_dblks"); + } + _rd_subm_cnt_dblks += a; + return _rd_subm_cnt_dblks; +} + +u_int32_t +fcntl::add_rd_cmpl_cnt_dblks(u_int32_t a) +{ + if (_rd_cmpl_cnt_dblks + a > _rd_subm_cnt_dblks) + { + std::ostringstream oss; + oss << "pfid=" << _pfid << " lfid=" << _lfid << " rd_cmpl_cnt_dblks=" << _rd_cmpl_cnt_dblks << " incr=" << a; + oss << " rd_subm_cnt_dblks=" << _rd_subm_cnt_dblks; + throw jexception(jerrno::JERR_FCNTL_CMPLOFFSOVFL, oss.str(), "fcntl", "add_rd_cmpl_cnt_dblks"); + } + _rd_cmpl_cnt_dblks += a; + return _rd_cmpl_cnt_dblks; +} + +u_int32_t +fcntl::add_wr_subm_cnt_dblks(u_int32_t a) +{ + if (_wr_subm_cnt_dblks + a > _ffull_dblks) // Allow for file header + { + std::ostringstream oss; + oss << "pfid=" << _pfid << " lfid=" << _lfid << " wr_subm_cnt_dblks=" << _wr_subm_cnt_dblks << " incr=" << a; + oss << " fsize=" << _ffull_dblks << " dblks"; + throw jexception(jerrno::JERR_FCNTL_FILEOFFSOVFL, oss.str(), "fcntl", "add_wr_subm_cnt_dblks"); + } + _wr_subm_cnt_dblks += a; + return _wr_subm_cnt_dblks; +} + +u_int32_t +fcntl::add_wr_cmpl_cnt_dblks(u_int32_t a) +{ + if (_wr_cmpl_cnt_dblks + a > _wr_subm_cnt_dblks) + { + std::ostringstream oss; + oss << "pfid=" << _pfid << " lfid=" << _lfid << " wr_cmpl_cnt_dblks=" << _wr_cmpl_cnt_dblks << " incr=" << a; + oss << " wr_subm_cnt_dblks=" << _wr_subm_cnt_dblks; + throw jexception(jerrno::JERR_FCNTL_CMPLOFFSOVFL, oss.str(), "fcntl", "add_wr_cmpl_cnt_dblks"); + } + _wr_cmpl_cnt_dblks += a; + return _wr_cmpl_cnt_dblks; +} + +u_int16_t +fcntl::decr_aio_cnt() +{ + if(_aio_cnt == 0) + { + std::ostringstream oss; + oss << "pfid=" << _pfid << " lfid=" << _lfid << " Decremented aio_cnt to below zero"; + throw jexception(jerrno::JERR__UNDERFLOW, oss.str(), "fcntl", "decr_aio_cnt"); + } + return --_aio_cnt; +} + +// Debug function +const std::string +fcntl::status_str() const +{ + std::ostringstream oss; + oss << "pfid=" << _pfid << " ws=" << _wr_subm_cnt_dblks << " wc=" << _wr_cmpl_cnt_dblks; + oss << " rs=" << _rd_subm_cnt_dblks << " rc=" << _rd_cmpl_cnt_dblks; + oss << " ec=" << _rec_enqcnt << " ac=" << _aio_cnt; + return oss.str(); +} + +// Protected functions + +void +fcntl::initialize(const std::string& fbasename, const u_int16_t pfid, const u_int16_t lfid, const u_int32_t jfsize_sblks, + const rcvdat* const ro) +{ + _pfid = pfid; + _lfid = lfid; + _fname = filename(fbasename, pfid); + +#ifdef RHM_JOWRITE + // In test mode, only create file if it does not exist + struct stat s; + if (::stat(_fname.c_str(), &s)) + { +#endif + if (ro) // Recovery initialization: set counters only + { + if (!ro->_jempty) + { + // For last file only, set write counters to end of last record (the + // continuation point); for all others, set to eof. + if (ro->_lfid == _pfid) + { + _wr_subm_cnt_dblks = ro->_eo/JRNL_DBLK_SIZE; + _wr_cmpl_cnt_dblks = ro->_eo/JRNL_DBLK_SIZE; + } + else + { + _wr_subm_cnt_dblks = _ffull_dblks; + _wr_cmpl_cnt_dblks = _ffull_dblks; + } + // Set the number of enqueued records for this file. + _rec_enqcnt = ro->_enq_cnt_list[_pfid]; + } + } + else // Normal initialization: create empty journal files + create_jfile(jfsize_sblks); +#ifdef RHM_JOWRITE + } +#endif +} + +std::string +fcntl::filename(const std::string& fbasename, const u_int16_t pfid) +{ + std::ostringstream oss; + oss << fbasename << "."; + oss << std::setw(4) << std::setfill('0') << std::hex << pfid; + oss << "." << JRNL_DATA_EXTENSION; + return oss.str(); +} + +void +fcntl::clean_file(const u_int32_t jfsize_sblks) +{ + // NOTE: The journal file size is always one sblock bigger than the specified journal + // file size, which is the data content size. The extra block is for the journal file + // header which precedes all data on each file and is exactly one sblock in size. + u_int32_t nsblks = jfsize_sblks + 1; + + // TODO - look at more efficient alternatives to allocating a null block: + // 1. mmap() against /dev/zero, but can alignment for O_DIRECT be assured? + // 2. ftruncate(), but does this result in a sparse file? If so, then this is no good. + + // Create temp null block for writing + const std::size_t sblksize = JRNL_DBLK_SIZE * JRNL_SBLK_SIZE; + void* nullbuf = 0; + // Allocate no more than 2MB (4096 sblks) as a null buffer + const u_int32_t nullbuffsize_sblks = nsblks > 4096 ? 4096 : nsblks; + const std::size_t nullbuffsize = nullbuffsize_sblks * sblksize; + if (::posix_memalign(&nullbuf, sblksize, nullbuffsize)) + { + std::ostringstream oss; + oss << "posix_memalign() failed: size=" << nullbuffsize << " blk_size=" << sblksize; + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__MALLOC, oss.str(), "fcntl", "clean_file"); + } + std::memset(nullbuf, 0, nullbuffsize); + + int fh = ::open(_fname.c_str(), O_WRONLY | O_CREAT | O_DIRECT, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); // 0644 -rw-r--r-- + if (fh < 0) + { + std::free(nullbuf); + std::ostringstream oss; + oss << "open() failed:" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_FCNTL_OPENWR, oss.str(), "fcntl", "clean_file"); + } + + while (nsblks > 0) + { + u_int32_t this_write_sblks = nsblks >= nullbuffsize_sblks ? nullbuffsize_sblks : nsblks; + if (::write(fh, nullbuf, this_write_sblks * sblksize) == -1) + { + ::close(fh); + std::free(nullbuf); + std::ostringstream oss; + oss << "wr_size=" << (this_write_sblks * sblksize) << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_FCNTL_WRITE, oss.str(), "fcntl", "clean_file"); + } + nsblks -= this_write_sblks; + } + + // Clean up + std::free(nullbuf); + if (::close(fh)) + { + std::ostringstream oss; + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_FCNTL_CLOSE, oss.str(), "fcntl", "clean_file"); + } +} + +void +fcntl::create_jfile(const u_int32_t jfsize_sblks) +{ + clean_file(jfsize_sblks); +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/fcntl.h b/cpp/src/qpid/legacystore/jrnl/fcntl.h new file mode 100644 index 0000000000..a75e3bc84d --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/fcntl.h @@ -0,0 +1,156 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file fcntl.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::fcntl (non-logging file + * handle), used for controlling journal log files. See class documentation for + * details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_FCNTL_H +#define QPID_LEGACYSTORE_JRNL_FCNTL_H + +namespace mrg +{ +namespace journal +{ +class fcntl; +} +} + +#include <cstddef> +#include <string> +#include "qpid/legacystore/jrnl/rcvdat.h" +#include <sys/types.h> + +namespace mrg +{ +namespace journal +{ + + /** + * \class fcntl + * \brief Journal file controller. There is one instance per journal file. + */ + class fcntl + { + protected: + std::string _fname; ///< File name + u_int16_t _pfid; ///< Physical file ID (file number in order of creation) + u_int16_t _lfid; ///< Logical file ID (ordinal number in ring store) + const u_int32_t _ffull_dblks; ///< File size in dblks (incl. file header) + int _wr_fh; ///< Write file handle + u_int32_t _rec_enqcnt; ///< Count of enqueued records + u_int32_t _rd_subm_cnt_dblks; ///< Read file count (data blocks) for submitted AIO + u_int32_t _rd_cmpl_cnt_dblks; ///< Read file count (data blocks) for completed AIO + u_int32_t _wr_subm_cnt_dblks; ///< Write file count (data blocks) for submitted AIO + u_int32_t _wr_cmpl_cnt_dblks; ///< Write file count (data blocks) for completed AIO + u_int16_t _aio_cnt; ///< Outstanding AIO operations on this file + bool _fhdr_wr_aio_outstanding; ///< Outstanding file header write on this file + + public: + // Constructors with implicit initialize() and open() + fcntl(const std::string& fbasename, const u_int16_t pfid, const u_int16_t lfid, const u_int32_t jfsize_sblks, + const rcvdat* const ro); + virtual ~fcntl(); + + virtual bool reset(const rcvdat* const ro = 0); + virtual void rd_reset(); + virtual bool wr_reset(const rcvdat* const ro = 0); + + virtual int open_wr_fh(); + virtual void close_wr_fh(); + inline bool is_wr_fh_open() const { return _wr_fh >= 0; } + + inline const std::string& fname() const { return _fname; } + inline u_int16_t pfid() const { return _pfid; } + inline u_int16_t lfid() const { return _lfid; } + inline void set_lfid(const u_int16_t lfid) { _lfid = lfid; } + inline int wr_fh() const { return _wr_fh; } + inline u_int32_t enqcnt() const { return _rec_enqcnt; } + inline u_int32_t incr_enqcnt() { return ++_rec_enqcnt; } + u_int32_t add_enqcnt(u_int32_t a); + u_int32_t decr_enqcnt(); + u_int32_t subtr_enqcnt(u_int32_t s); + + inline u_int32_t rd_subm_cnt_dblks() const { return _rd_subm_cnt_dblks; } + inline std::size_t rd_subm_offs() const { return _rd_subm_cnt_dblks * JRNL_DBLK_SIZE; } + u_int32_t add_rd_subm_cnt_dblks(u_int32_t a); + + inline u_int32_t rd_cmpl_cnt_dblks() const { return _rd_cmpl_cnt_dblks; } + inline std::size_t rd_cmpl_offs() const { return _rd_cmpl_cnt_dblks * JRNL_DBLK_SIZE; } + u_int32_t add_rd_cmpl_cnt_dblks(u_int32_t a); + + inline u_int32_t wr_subm_cnt_dblks() const { return _wr_subm_cnt_dblks; } + inline std::size_t wr_subm_offs() const { return _wr_subm_cnt_dblks * JRNL_DBLK_SIZE; } + u_int32_t add_wr_subm_cnt_dblks(u_int32_t a); + + inline u_int32_t wr_cmpl_cnt_dblks() const { return _wr_cmpl_cnt_dblks; } + inline std::size_t wr_cmpl_offs() const { return _wr_cmpl_cnt_dblks * JRNL_DBLK_SIZE; } + u_int32_t add_wr_cmpl_cnt_dblks(u_int32_t a); + + inline u_int16_t aio_cnt() const { return _aio_cnt; } + inline u_int16_t incr_aio_cnt() { return ++_aio_cnt; } + u_int16_t decr_aio_cnt(); + + inline bool wr_fhdr_aio_outstanding() { return _fhdr_wr_aio_outstanding; } + inline void set_wr_fhdr_aio_outstanding(const bool wfao) { _fhdr_wr_aio_outstanding = wfao; } + + // Derived helper functions + + inline bool rd_void() const { return _wr_cmpl_cnt_dblks == 0; } + inline bool rd_empty() const { return _wr_cmpl_cnt_dblks <= JRNL_SBLK_SIZE; } + inline u_int32_t rd_remaining_dblks() const { return _wr_cmpl_cnt_dblks - _rd_subm_cnt_dblks; } + inline bool is_rd_full() const { return _wr_cmpl_cnt_dblks == _rd_subm_cnt_dblks; } + inline bool is_rd_compl() const { return _wr_cmpl_cnt_dblks == _rd_cmpl_cnt_dblks; } + inline u_int32_t rd_aio_outstanding_dblks() const { return _rd_subm_cnt_dblks - _rd_cmpl_cnt_dblks; } + inline bool rd_file_rotate() const { return is_rd_full() && is_wr_compl(); } + + inline bool wr_void() const { return _wr_subm_cnt_dblks == 0; } + inline bool wr_empty() const { return _wr_subm_cnt_dblks <= JRNL_SBLK_SIZE; } + inline u_int32_t wr_remaining_dblks() const { return _ffull_dblks - _wr_subm_cnt_dblks; } + inline bool is_wr_full() const { return _ffull_dblks == _wr_subm_cnt_dblks; } + inline bool is_wr_compl() const { return _ffull_dblks == _wr_cmpl_cnt_dblks; } + inline u_int32_t wr_aio_outstanding_dblks() const { return _wr_subm_cnt_dblks - _wr_cmpl_cnt_dblks; } + inline bool wr_file_rotate() const { return is_wr_full(); } + + // Debug aid + const std::string status_str() const; + + protected: + virtual void initialize(const std::string& fbasename, const u_int16_t pfid, const u_int16_t lfid, + const u_int32_t jfsize_sblks, const rcvdat* const ro); + + static std::string filename(const std::string& fbasename, const u_int16_t pfid); + void clean_file(const u_int32_t jfsize_sblks); + void create_jfile(const u_int32_t jfsize_sblks); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_FCNTL_H diff --git a/cpp/src/qpid/legacystore/jrnl/file_hdr.h b/cpp/src/qpid/legacystore/jrnl/file_hdr.h new file mode 100644 index 0000000000..db20834cbb --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/file_hdr.h @@ -0,0 +1,211 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file file_hdr.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::file_hdr (file + * record header), used to start a journal file. It contains some + * file metadata and information to aid journal recovery. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_FILE_HDR_H +#define QPID_LEGACYSTORE_JRNL_FILE_HDR_H + +#include <cerrno> +#include <ctime> +#include "qpid/legacystore/jrnl/rec_hdr.h" +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +#pragma pack(1) + + /** + * \brief Struct for data common to the head of all journal files. In addition to + * the common data, this includes the record ID and offset of the first record in + * the file. + * + * This header precedes all data in journal files and occupies the first complete + * block in the file. The record ID and offset are updated on each overwrite of the + * file. + * + * File header info in binary format (48 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ -+ + * | magic | v | e | flags | | + * +---+---+---+---+---+---+---+---+ | struct hdr + * | first rid in file | | + * +---+---+---+---+---+---+---+---+ -+ + * | pfid | lfid | reserved (0) | + * +---+---+---+---+---+---+---+---+ + * | fro | + * +---+---+---+---+---+---+---+---+ + * | timestamp (sec) | + * +---+---+---+---+---+---+---+---+ + * | timestamp (ns) | + * +---+---+---+---+---+---+---+---+ + * v = file version (If the format or encoding of this file changes, then this + * number should be incremented) + * e = endian flag, false (0x00) for little endian, true (0x01) for big endian + * pfid = File ID (number used in naming file) + * lfid = Logical ID (order used in circular buffer) + * fro = First record offset, offset from start of file to first record header + * </pre> + * + * Note that journal files should be transferable between 32- and 64-bit + * hardware of the same endianness, but not between hardware of opposite + * entianness without some sort of binary conversion utility. Thus buffering + * will be needed for types that change size between 32- and 64-bit compiles. + */ + struct file_hdr : rec_hdr + { + u_int16_t _pfid; ///< Physical file ID (pfid) + u_int16_t _lfid; ///< Logical file ID (lfid) + u_int32_t _res; ///< Reserved (for alignment/flags) +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler0; ///< Big-endian filler for 32-bit size_t +#endif + std::size_t _fro; ///< First record offset +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler0; ///< Little-endian filler for 32-bit size_t +#endif +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler1; ///< Big-endian filler for 32-bit time_t +#endif + std::time_t _ts_sec; ///< Timestamp of journal initilailization +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler1; ///< Little-endian filler for 32-bit time_t +#endif +#if defined(JRNL_BIG_ENDIAN) + u_int32_t _filler2; ///< Big endian filler for u_int32_t +#endif + u_int32_t _ts_nsec; ///< Timestamp of journal initilailization +#if defined(JRNL_LITTLE_ENDIAN) + u_int32_t _filler2; ///< Little-endian filler for u_int32_t +#endif + + /** + * \brief Default constructor, which sets all values to 0. + */ + inline file_hdr(): rec_hdr(), _pfid(0), _lfid(0), _res(0), +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif + _fro(0), +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler1(0), +#endif + _ts_sec(0), +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + _filler1(0), +#endif +#if defined(JRNL_BIG_ENDIAN) + _filler2(0), +#endif + _ts_nsec(0) +#if defined(JRNL_LITTLE_ENDIAN) + , _filler2(0) +#endif + {} + + /** + * \brief Convenience constructor which initializes values during construction. + */ + inline file_hdr(const u_int32_t magic, const u_int8_t version, const u_int64_t rid, + const u_int16_t pfid, const u_int16_t lfid, const std::size_t fro, + const bool owi, const bool settime = false): + rec_hdr(magic, version, rid, owi), _pfid(pfid), _lfid(lfid), _res(0), +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif + _fro(fro), +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler1(0), +#endif + _ts_sec(0), +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + _filler1(0), +#endif +#if defined(JRNL_BIG_ENDIAN) + _filler2(0), +#endif + _ts_nsec(0) +#if defined(JRNL_LITTLE_ENDIAN) + , _filler2(0) +#endif + { if (settime) set_time(); } + + /** + * \brief Gets the current time from the system clock and sets the timestamp in the struct. + */ + inline void set_time() + { + // TODO: Standardize on method for getting time that does not requrie a context switch. + timespec ts; + if (::clock_gettime(CLOCK_REALTIME, &ts)) + { + std::ostringstream oss; + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__RTCLOCK, oss.str(), "file_hdr", "set_time"); + } + _ts_sec = ts.tv_sec; + _ts_nsec = ts.tv_nsec; + } + + /** + * \brief Sets the timestamp in the struct to the provided value (in seconds and + * nanoseconds). + */ + inline void set_time(timespec& ts) + { + _ts_sec = ts.tv_sec; + _ts_nsec = ts.tv_nsec; + } + + /** + * \brief Returns the size of the header in bytes. + */ + inline static std::size_t size() { return sizeof(file_hdr); } + }; // struct file_hdr + +#pragma pack() + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_FILE_HDR_H diff --git a/cpp/src/qpid/legacystore/jrnl/jcfg.h b/cpp/src/qpid/legacystore/jrnl/jcfg.h new file mode 100644 index 0000000000..0a0d0df28d --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jcfg.h @@ -0,0 +1,91 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jcfg.h + * + * Qpid asynchronous store plugin library + * + * This file contains \#defines that control the implementation details of + * the journal. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_JCFG_H +#define QPID_LEGACYSTORE_JRNL_JCFG_H + +#if defined(__i386__) /* little endian, 32 bits */ +#define JRNL_LITTLE_ENDIAN +#define JRNL_32_BIT +#elif defined(__PPC__) || defined(__s390__) /* big endian, 32 bits */ +#define JRNL_BIG_ENDIAN +#define JRNL_32_BIT +#elif defined(__ia64__) || defined(__x86_64__) || defined(__alpha__) /* little endian, 64 bits */ +#define JRNL_LITTLE_ENDIAN +#define JRNL_64_BIT +#elif defined(__powerpc64__) || defined(__s390x__) /* big endian, 64 bits */ +#define JRNL_BIG_ENDIAN +#define JRNL_64_BIT +#else +#error endian? +#endif + + +/** +* <b>Rule:</b> Data block size (JRNL_DBLK_SIZE) MUST be a power of 2 such that +* <pre> +* JRNL_DBLK_SIZE * JRNL_SBLK_SIZE == n * 512 (n = 1,2,3...) +* </pre> +* (The disk softblock size is 512 for Linux kernels >= 2.6) +*/ +#define JRNL_DBLK_SIZE 128 ///< Data block size in bytes (CANNOT BE LESS THAN 32!) +#define JRNL_SBLK_SIZE 4 ///< Disk softblock size in multiples of JRNL_DBLK_SIZE +#define JRNL_MIN_FILE_SIZE 128 ///< Min. jrnl file size in sblks (excl. file_hdr) +#define JRNL_MAX_FILE_SIZE 4194176 ///< Max. jrnl file size in sblks (excl. file_hdr) +#define JRNL_MIN_NUM_FILES 4 ///< Min. number of journal files +#define JRNL_MAX_NUM_FILES 64 ///< Max. number of journal files +#define JRNL_ENQ_THRESHOLD 80 ///< Percent full when enqueue connection will be closed + +#define JRNL_RMGR_PAGE_SIZE 128 ///< Journal page size in softblocks +#define JRNL_RMGR_PAGES 16 ///< Number of pages to use in wmgr + +#define JRNL_WMGR_DEF_PAGE_SIZE 64 ///< Journal write page size in softblocks (default) +#define JRNL_WMGR_DEF_PAGES 32 ///< Number of pages to use in wmgr (default) + +#define JRNL_WMGR_MAXDTOKPP 1024 ///< Max. dtoks (data blocks) per page in wmgr +#define JRNL_WMGR_MAXWAITUS 100 ///< Max. wait time (us) before submitting AIO + +#define JRNL_INFO_EXTENSION "jinf" ///< Extension for journal info files +#define JRNL_DATA_EXTENSION "jdat" ///< Extension for journal data files +#define RHM_JDAT_TXA_MAGIC 0x614d4852 ///< ("RHMa" in little endian) Magic for dtx abort hdrs +#define RHM_JDAT_TXC_MAGIC 0x634d4852 ///< ("RHMc" in little endian) Magic for dtx commit hdrs +#define RHM_JDAT_DEQ_MAGIC 0x644d4852 ///< ("RHMd" in little endian) Magic for deq rec hdrs +#define RHM_JDAT_ENQ_MAGIC 0x654d4852 ///< ("RHMe" in little endian) Magic for enq rec hdrs +#define RHM_JDAT_FILE_MAGIC 0x664d4852 ///< ("RHMf" in little endian) Magic for file hdrs +#define RHM_JDAT_EMPTY_MAGIC 0x784d4852 ///< ("RHMx" in little endian) Magic for empty dblk +#define RHM_JDAT_VERSION 0x01 ///< Version (of file layout) +#define RHM_CLEAN_CHAR 0xff ///< Char used to clear empty space on disk + +#define RHM_LENDIAN_FLAG 0 ///< Value of little endian flag on disk +#define RHM_BENDIAN_FLAG 1 ///< Value of big endian flag on disk + +#endif // ifndef QPID_LEGACYSTORE_JRNL_JCFG_H diff --git a/cpp/src/qpid/legacystore/jrnl/jcntl.cpp b/cpp/src/qpid/legacystore/jrnl/jcntl.cpp new file mode 100644 index 0000000000..a03076dca5 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jcntl.cpp @@ -0,0 +1,984 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jcntl.cpp + * + * Qpid asynchronous store plugin library + * + * Messaging journal top-level control and interface class + * mrg::journal::jcntl. See comments in file jcntl.h for details. + * + * \author Kim van der Riet + */ + + +#include "qpid/legacystore/jrnl/jcntl.h" + +#include <algorithm> +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <cstring> +#include <fstream> +#include <iomanip> +#include <iostream> +#include "qpid/legacystore/jrnl/file_hdr.h" +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jinf.h" +#include <limits> +#include <sstream> +#include <unistd.h> + +namespace mrg +{ +namespace journal +{ + +#define AIO_CMPL_TIMEOUT_SEC 5 +#define AIO_CMPL_TIMEOUT_NSEC 0 +#define FINAL_AIO_CMPL_TIMEOUT_SEC 15 +#define FINAL_AIO_CMPL_TIMEOUT_NSEC 0 + +// Static +timespec jcntl::_aio_cmpl_timeout; ///< Timeout for blocking libaio returns +timespec jcntl::_final_aio_cmpl_timeout; ///< Timeout for blocking libaio returns when stopping or finalizing +bool jcntl::_init = init_statics(); +bool jcntl::init_statics() +{ + _aio_cmpl_timeout.tv_sec = AIO_CMPL_TIMEOUT_SEC; + _aio_cmpl_timeout.tv_nsec = AIO_CMPL_TIMEOUT_NSEC; + _final_aio_cmpl_timeout.tv_sec = FINAL_AIO_CMPL_TIMEOUT_SEC; + _final_aio_cmpl_timeout.tv_nsec = FINAL_AIO_CMPL_TIMEOUT_NSEC; + return true; +} + + +// Functions + +jcntl::jcntl(const std::string& jid, const std::string& jdir, const std::string& base_filename): + _jid(jid), + _jdir(jdir, base_filename), + _base_filename(base_filename), + _init_flag(false), + _stop_flag(false), + _readonly_flag(false), + _autostop(true), + _jfsize_sblks(0), + _lpmgr(), + _emap(), + _tmap(), + _rrfc(&_lpmgr), + _wrfc(&_lpmgr), + _rmgr(this, _emap, _tmap, _rrfc), + _wmgr(this, _emap, _tmap, _wrfc), + _rcvdat() +{} + +jcntl::~jcntl() +{ + if (_init_flag && !_stop_flag) + try { stop(true); } + catch (const jexception& e) { std::cerr << e << std::endl; } + _lpmgr.finalize(); +} + +void +jcntl::initialize(const u_int16_t num_jfiles, const bool ae, const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, const u_int16_t wcache_num_pages, const u_int32_t wcache_pgsize_sblks, + aio_callback* const cbp) +{ + _init_flag = false; + _stop_flag = false; + _readonly_flag = false; + + _emap.clear(); + _tmap.clear(); + + _lpmgr.finalize(); + + // Set new file geometry parameters + assert(num_jfiles >= JRNL_MIN_NUM_FILES); + assert(num_jfiles <= JRNL_MAX_NUM_FILES); + _emap.set_num_jfiles(num_jfiles); + _tmap.set_num_jfiles(num_jfiles); + + assert(jfsize_sblks >= JRNL_MIN_FILE_SIZE); + assert(jfsize_sblks <= JRNL_MAX_FILE_SIZE); + _jfsize_sblks = jfsize_sblks; + + // Clear any existing journal files + _jdir.clear_dir(); + _lpmgr.initialize(num_jfiles, ae, ae_max_jfiles, this, &new_fcntl); + + _wrfc.initialize(_jfsize_sblks); + _rrfc.initialize(); + _rrfc.set_findex(0); + _rmgr.initialize(cbp); + _wmgr.initialize(cbp, wcache_pgsize_sblks, wcache_num_pages, JRNL_WMGR_MAXDTOKPP, JRNL_WMGR_MAXWAITUS); + + // Write info file (<basename>.jinf) to disk + write_infofile(); + + _init_flag = true; +} + +void +jcntl::recover(const u_int16_t num_jfiles, const bool ae, const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, const u_int16_t wcache_num_pages, const u_int32_t wcache_pgsize_sblks, +// const rd_aio_cb rd_cb, const wr_aio_cb wr_cb, const std::vector<std::string>* prep_txn_list_ptr, + aio_callback* const cbp, const std::vector<std::string>* prep_txn_list_ptr, + u_int64_t& highest_rid) +{ + _init_flag = false; + _stop_flag = false; + _readonly_flag = false; + + _emap.clear(); + _tmap.clear(); + + _lpmgr.finalize(); + + assert(num_jfiles >= JRNL_MIN_NUM_FILES); + assert(num_jfiles <= JRNL_MAX_NUM_FILES); + assert(jfsize_sblks >= JRNL_MIN_FILE_SIZE); + assert(jfsize_sblks <= JRNL_MAX_FILE_SIZE); + _jfsize_sblks = jfsize_sblks; + + // Verify journal dir and journal files + _jdir.verify_dir(); + _rcvdat.reset(num_jfiles, ae, ae_max_jfiles); + + rcvr_janalyze(_rcvdat, prep_txn_list_ptr); + highest_rid = _rcvdat._h_rid; + if (_rcvdat._jfull) + throw jexception(jerrno::JERR_JCNTL_RECOVERJFULL, "jcntl", "recover"); + this->log(LOG_DEBUG, _rcvdat.to_log(_jid)); + + _lpmgr.recover(_rcvdat, this, &new_fcntl); + + _wrfc.initialize(_jfsize_sblks, &_rcvdat); + _rrfc.initialize(); + _rrfc.set_findex(_rcvdat.ffid()); + _rmgr.initialize(cbp); + _wmgr.initialize(cbp, wcache_pgsize_sblks, wcache_num_pages, JRNL_WMGR_MAXDTOKPP, JRNL_WMGR_MAXWAITUS, + (_rcvdat._lffull ? 0 : _rcvdat._eo)); + + _readonly_flag = true; + _init_flag = true; +} + +void +jcntl::recover_complete() +{ + if (!_readonly_flag) + throw jexception(jerrno::JERR_JCNTL_NOTRECOVERED, "jcntl", "recover_complete"); + for (u_int16_t i=0; i<_lpmgr.num_jfiles(); i++) + _lpmgr.get_fcntlp(i)->reset(&_rcvdat); + _wrfc.initialize(_jfsize_sblks, &_rcvdat); + _rrfc.initialize(); + _rrfc.set_findex(_rcvdat.ffid()); + _rmgr.recover_complete(); + _readonly_flag = false; +} + +void +jcntl::delete_jrnl_files() +{ + stop(true); // wait for AIO to complete + _jdir.delete_dir(); +} + + +iores +jcntl::enqueue_data_record(const void* const data_buff, const std::size_t tot_data_len, + const std::size_t this_data_len, data_tok* dtokp, const bool transient) +{ + iores r; + check_wstatus("enqueue_data_record"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.enqueue(data_buff, tot_data_len, this_data_len, dtokp, 0, 0, transient, false), r, + dtokp)) ; + } + return r; +} + +iores +jcntl::enqueue_extern_data_record(const std::size_t tot_data_len, data_tok* dtokp, const bool transient) +{ + iores r; + check_wstatus("enqueue_extern_data_record"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.enqueue(0, tot_data_len, 0, dtokp, 0, 0, transient, true), r, dtokp)) ; + } + return r; +} + +iores +jcntl::enqueue_txn_data_record(const void* const data_buff, const std::size_t tot_data_len, + const std::size_t this_data_len, data_tok* dtokp, const std::string& xid, + const bool transient) +{ + iores r; + check_wstatus("enqueue_tx_data_record"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.enqueue(data_buff, tot_data_len, this_data_len, dtokp, xid.data(), xid.size(), + transient, false), r, dtokp)) ; + } + return r; +} + +iores +jcntl::enqueue_extern_txn_data_record(const std::size_t tot_data_len, data_tok* dtokp, + const std::string& xid, const bool transient) +{ + iores r; + check_wstatus("enqueue_extern_txn_data_record"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.enqueue(0, tot_data_len, 0, dtokp, xid.data(), xid.size(), transient, true), r, + dtokp)) ; + } + return r; +} + +/* TODO +iores +jcntl::get_data_record(const u_int64_t& rid, const std::size_t& dsize, const std::size_t& dsize_avail, + const void** const data, bool auto_discard) +{ + check_rstatus("get_data_record"); + return _rmgr.get(rid, dsize, dsize_avail, data, auto_discard); +} */ + +/* TODO +iores +jcntl::discard_data_record(data_tok* const dtokp) +{ + check_rstatus("discard_data_record"); + return _rmgr.discard(dtokp); +} */ + +iores +jcntl::read_data_record(void** const datapp, std::size_t& dsize, void** const xidpp, std::size_t& xidsize, + bool& transient, bool& external, data_tok* const dtokp, bool ignore_pending_txns) +{ + check_rstatus("read_data"); + iores res = _rmgr.read(datapp, dsize, xidpp, xidsize, transient, external, dtokp, ignore_pending_txns); + if (res == RHM_IORES_RCINVALID) + { + get_wr_events(0); // check for outstanding write events + iores sres = _rmgr.synchronize(); // flushes all outstanding read events + if (sres != RHM_IORES_SUCCESS) + return sres; + _rmgr.wait_for_validity(&_aio_cmpl_timeout, true); // throw if timeout occurs + res = _rmgr.read(datapp, dsize, xidpp, xidsize, transient, external, dtokp, ignore_pending_txns); + } + return res; +} + +iores +jcntl::dequeue_data_record(data_tok* const dtokp, const bool txn_coml_commit) +{ + iores r; + check_wstatus("dequeue_data"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.dequeue(dtokp, 0, 0, txn_coml_commit), r, dtokp)) ; + } + return r; +} + +iores +jcntl::dequeue_txn_data_record(data_tok* const dtokp, const std::string& xid, const bool txn_coml_commit) +{ + iores r; + check_wstatus("dequeue_data"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.dequeue(dtokp, xid.data(), xid.size(), txn_coml_commit), r, dtokp)) ; + } + return r; +} + +iores +jcntl::txn_abort(data_tok* const dtokp, const std::string& xid) +{ + iores r; + check_wstatus("txn_abort"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.abort(dtokp, xid.data(), xid.size()), r, dtokp)) ; + } + return r; +} + +iores +jcntl::txn_commit(data_tok* const dtokp, const std::string& xid) +{ + iores r; + check_wstatus("txn_commit"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.commit(dtokp, xid.data(), xid.size()), r, dtokp)) ; + } + return r; +} + +bool +jcntl::is_txn_synced(const std::string& xid) +{ + slock s(_wr_mutex); + bool res = _wmgr.is_txn_synced(xid); + return res; +} + +int32_t +jcntl::get_wr_events(timespec* const timeout) +{ + stlock t(_wr_mutex); + if (!t.locked()) + return jerrno::LOCK_TAKEN; + int32_t res = _wmgr.get_events(pmgr::UNUSED, timeout); + return res; +} + +int32_t +jcntl::get_rd_events(timespec* const timeout) +{ + return _rmgr.get_events(pmgr::AIO_COMPLETE, timeout); +} + +void +jcntl::stop(const bool block_till_aio_cmpl) +{ + if (_readonly_flag) + check_rstatus("stop"); + else + check_wstatus("stop"); + _stop_flag = true; + if (!_readonly_flag) + flush(block_till_aio_cmpl); + _rrfc.finalize(); + _lpmgr.finalize(); +} + +u_int16_t +jcntl::get_earliest_fid() +{ + u_int16_t ffid = _wrfc.earliest_index(); + u_int16_t fid = _wrfc.index(); + while ( _emap.get_enq_cnt(ffid) == 0 && _tmap.get_txn_pfid_cnt(ffid) == 0 && ffid != fid) + { + if (++ffid >= _lpmgr.num_jfiles()) + ffid = 0; + } + if (!_rrfc.is_active()) + _rrfc.set_findex(ffid); + return ffid; +} + +iores +jcntl::flush(const bool block_till_aio_cmpl) +{ + if (!_init_flag) + return RHM_IORES_SUCCESS; + if (_readonly_flag) + throw jexception(jerrno::JERR_JCNTL_READONLY, "jcntl", "flush"); + iores res; + { + slock s(_wr_mutex); + res = _wmgr.flush(); + } + if (block_till_aio_cmpl) + aio_cmpl_wait(); + return res; +} + +void +jcntl::log(log_level ll, const std::string& log_stmt) const +{ + log(ll, log_stmt.c_str()); +} + +void +jcntl::log(log_level ll, const char* const log_stmt) const +{ + if (ll > LOG_INFO) + { + std::cout << log_level_str(ll) << ": Journal \"" << _jid << "\": " << log_stmt << std::endl; + } +} + +void +jcntl::chk_wr_frot() +{ + if (_wrfc.index() == _rrfc.index()) + _rmgr.invalidate(); +} + +void +jcntl::fhdr_wr_sync(const u_int16_t lid) +{ + fcntl* fcntlp = _lpmgr.get_fcntlp(lid); + while (fcntlp->wr_fhdr_aio_outstanding()) + { + if (get_wr_events(&_aio_cmpl_timeout) == jerrno::AIO_TIMEOUT) + throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "fhdr_wr_sync"); + } +} + +fcntl* +jcntl::new_fcntl(jcntl* const jcp, const u_int16_t lid, const u_int16_t fid, const rcvdat* const rdp) +{ + if (!jcp) return 0; + std::ostringstream oss; + oss << jcp->jrnl_dir() << "/" << jcp->base_filename(); + return new fcntl(oss.str(), fid, lid, jcp->jfsize_sblks(), rdp); +} + +// Protected/Private functions + +void +jcntl::check_wstatus(const char* fn_name) const +{ + if (!_init_flag) + throw jexception(jerrno::JERR__NINIT, "jcntl", fn_name); + if (_readonly_flag) + throw jexception(jerrno::JERR_JCNTL_READONLY, "jcntl", fn_name); + if (_stop_flag) + throw jexception(jerrno::JERR_JCNTL_STOPPED, "jcntl", fn_name); +} + +void +jcntl::check_rstatus(const char* fn_name) const +{ + if (!_init_flag) + throw jexception(jerrno::JERR__NINIT, "jcntl", fn_name); + if (_stop_flag) + throw jexception(jerrno::JERR_JCNTL_STOPPED, "jcntl", fn_name); +} + +void +jcntl::write_infofile() const +{ + timespec ts; + if (::clock_gettime(CLOCK_REALTIME, &ts)) + { + std::ostringstream oss; + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__RTCLOCK, oss.str(), "jcntl", "write_infofile"); + } + jinf ji(_jid, _jdir.dirname(), _base_filename, _lpmgr.num_jfiles(), _lpmgr.is_ae(), _lpmgr.ae_max_jfiles(), + _jfsize_sblks, _wmgr.cache_pgsize_sblks(), _wmgr.cache_num_pages(), ts); + ji.write(); +} + +void +jcntl::aio_cmpl_wait() +{ + //while (_wmgr.get_aio_evt_rem()) + while (true) + { + u_int32_t aer; + { + slock s(_wr_mutex); + aer = _wmgr.get_aio_evt_rem(); + } + if (aer == 0) break; // no events left + if (get_wr_events(&_aio_cmpl_timeout) == jerrno::AIO_TIMEOUT) + throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "aio_cmpl_wait"); + } +} + +bool +jcntl::handle_aio_wait(const iores res, iores& resout, const data_tok* dtp) +{ + resout = res; + if (res == RHM_IORES_PAGE_AIOWAIT) + { + while (_wmgr.curr_pg_blocked()) + { + if (_wmgr.get_events(pmgr::UNUSED, &_aio_cmpl_timeout) == jerrno::AIO_TIMEOUT) + { + std::ostringstream oss; + oss << "get_events() returned JERR_JCNTL_AIOCMPLWAIT; wmgr_status: " << _wmgr.status_str(); + this->log(LOG_CRITICAL, oss.str()); + throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "handle_aio_wait"); + } + } + return true; + } + else if (res == RHM_IORES_FILE_AIOWAIT) + { + while (_wmgr.curr_file_blocked()) + { + if (_wmgr.get_events(pmgr::UNUSED, &_aio_cmpl_timeout) == jerrno::AIO_TIMEOUT) + { + std::ostringstream oss; + oss << "get_events() returned JERR_JCNTL_AIOCMPLWAIT; wmgr_status: " << _wmgr.status_str(); + this->log(LOG_CRITICAL, oss.str()); + throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "handle_aio_wait"); + } + } + _wrfc.wr_reset(); + resout = RHM_IORES_SUCCESS; + data_tok::write_state ws = dtp->wstate(); + return ws == data_tok::ENQ_PART || ws == data_tok::DEQ_PART || ws == data_tok::ABORT_PART || + ws == data_tok::COMMIT_PART; + } + return false; +} + +void +jcntl::rcvr_janalyze(rcvdat& rd, const std::vector<std::string>* prep_txn_list_ptr) +{ + jinf ji(_jdir.dirname() + "/" + _base_filename + "." + JRNL_INFO_EXTENSION, true); + + // If the number of files does not tie up with the jinf file from the journal being recovered, + // use the jinf data. + if (rd._njf != ji.num_jfiles()) + { + std::ostringstream oss; + oss << "Recovery found " << ji.num_jfiles() << + " files (different from --num-jfiles value of " << rd._njf << ")."; + this->log(LOG_WARN, oss.str()); + rd._njf = ji.num_jfiles(); + _rcvdat._enq_cnt_list.resize(rd._njf); + } + _emap.set_num_jfiles(rd._njf); + _tmap.set_num_jfiles(rd._njf); + if (_jfsize_sblks != ji.jfsize_sblks()) + { + std::ostringstream oss; + oss << "Recovery found file size = " << (ji.jfsize_sblks() / JRNL_RMGR_PAGE_SIZE) << + " (different from --jfile-size-pgs value of " << + (_jfsize_sblks / JRNL_RMGR_PAGE_SIZE) << ")."; + this->log(LOG_WARN, oss.str()); + _jfsize_sblks = ji.jfsize_sblks(); + } + if (_jdir.dirname().compare(ji.jdir())) + { + std::ostringstream oss; + oss << "Journal file location change: original = \"" << ji.jdir() << + "\"; current = \"" << _jdir.dirname() << "\""; + this->log(LOG_WARN, oss.str()); + ji.set_jdir(_jdir.dirname()); + } + + try + { + rd._ffid = ji.get_first_pfid(); + rd._lfid = ji.get_last_pfid(); + rd._owi = ji.get_initial_owi(); + rd._frot = ji.get_frot(); + rd._jempty = false; + ji.get_normalized_pfid_list(rd._fid_list); // _pfid_list + } + catch (const jexception& e) + { + if (e.err_code() != jerrno::JERR_JINF_JDATEMPTY) throw; + } + + // Restore all read and write pointers and transactions + if (!rd._jempty) + { + u_int16_t fid = rd._ffid; + std::ifstream ifs; + bool lowi = rd._owi; // local copy of owi to be used during analysis + while (rcvr_get_next_record(fid, &ifs, lowi, rd)) ; + if (ifs.is_open()) ifs.close(); + + // Remove all txns from tmap that are not in the prepared list + if (prep_txn_list_ptr) + { + std::vector<std::string> xid_list; + _tmap.xid_list(xid_list); + for (std::vector<std::string>::iterator itr = xid_list.begin(); itr != xid_list.end(); itr++) + { + std::vector<std::string>::const_iterator pitr = + std::find(prep_txn_list_ptr->begin(), prep_txn_list_ptr->end(), *itr); + if (pitr == prep_txn_list_ptr->end()) // not found in prepared list + { + txn_data_list tdl = _tmap.get_remove_tdata_list(*itr); // tdl will be empty if xid not found + // Unlock any affected enqueues in emap + for (tdl_itr i=tdl.begin(); i<tdl.end(); i++) + { + if (i->_enq_flag) // enq op - decrement enqueue count + rd._enq_cnt_list[i->_pfid]--; + else if (_emap.is_enqueued(i->_drid, true)) // deq op - unlock enq record + { + int16_t ret = _emap.unlock(i->_drid); + if (ret < enq_map::EMAP_OK) // fail + { + // enq_map::unlock()'s only error is enq_map::EMAP_RID_NOT_FOUND + std::ostringstream oss; + oss << std::hex << "_emap.unlock(): drid=0x\"" << i->_drid; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "jcntl", "rcvr_janalyze"); + } + } + } + } + } + } + + // Check for file full condition - add one to _jfsize_sblks to account for file header + rd._lffull = rd._eo == (1 + _jfsize_sblks) * JRNL_SBLK_SIZE * JRNL_DBLK_SIZE; + + // Check for journal full condition + u_int16_t next_wr_fid = (rd._lfid + 1) % rd._njf; + rd._jfull = rd._ffid == next_wr_fid && rd._enq_cnt_list[next_wr_fid] && rd._lffull; + } +} + +bool +jcntl::rcvr_get_next_record(u_int16_t& fid, std::ifstream* ifsp, bool& lowi, rcvdat& rd) +{ + std::size_t cum_size_read = 0; + void* xidp = 0; + rec_hdr h; + + bool hdr_ok = false; + std::streampos file_pos; + while (!hdr_ok) + { + if (!ifsp->is_open()) + { + if (!jfile_cycle(fid, ifsp, lowi, rd, true)) + return false; + } + file_pos = ifsp->tellg(); + ifsp->read((char*)&h, sizeof(rec_hdr)); + if (ifsp->gcount() == sizeof(rec_hdr)) + hdr_ok = true; + else + { + if (!jfile_cycle(fid, ifsp, lowi, rd, true)) + return false; + } + } + + switch(h._magic) + { + case RHM_JDAT_ENQ_MAGIC: + { + enq_rec er; + u_int16_t start_fid = fid; // fid may increment in decode() if record folds over file boundary + if (!decode(er, fid, ifsp, cum_size_read, h, lowi, rd, file_pos)) + return false; + if (!er.is_transient()) // Ignore transient msgs + { + rd._enq_cnt_list[start_fid]++; + if (er.xid_size()) + { + er.get_xid(&xidp); + assert(xidp != 0); + std::string xid((char*)xidp, er.xid_size()); + _tmap.insert_txn_data(xid, txn_data(h._rid, 0, start_fid, true)); + if (_tmap.set_aio_compl(xid, h._rid) < txn_map::TMAP_OK) // fail - xid or rid not found + { + std::ostringstream oss; + oss << std::hex << "_tmap.set_aio_compl: txn_enq xid=\"" << xid << "\" rid=0x" << h._rid; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "jcntl", "rcvr_get_next_record"); + } + std::free(xidp); + } + else + { + if (_emap.insert_pfid(h._rid, start_fid) < enq_map::EMAP_OK) // fail + { + // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID. + std::ostringstream oss; + oss << std::hex << "rid=0x" << h._rid << " _pfid=0x" << start_fid; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "jcntl", "rcvr_get_next_record"); + } + } + } + } + break; + case RHM_JDAT_DEQ_MAGIC: + { + deq_rec dr; + u_int16_t start_fid = fid; // fid may increment in decode() if record folds over file boundary + if (!decode(dr, fid, ifsp, cum_size_read, h, lowi, rd, file_pos)) + return false; + if (dr.xid_size()) + { + // If the enqueue is part of a pending txn, it will not yet be in emap + _emap.lock(dr.deq_rid()); // ignore not found error + dr.get_xid(&xidp); + assert(xidp != 0); + std::string xid((char*)xidp, dr.xid_size()); + _tmap.insert_txn_data(xid, txn_data(dr.rid(), dr.deq_rid(), start_fid, false, + dr.is_txn_coml_commit())); + if (_tmap.set_aio_compl(xid, dr.rid()) < txn_map::TMAP_OK) // fail - xid or rid not found + { + std::ostringstream oss; + oss << std::hex << "_tmap.set_aio_compl: txn_deq xid=\"" << xid << "\" rid=0x" << dr.rid(); + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "jcntl", "rcvr_get_next_record"); + } + std::free(xidp); + } + else + { + int16_t enq_fid = _emap.get_remove_pfid(dr.deq_rid(), true); + if (enq_fid >= enq_map::EMAP_OK) // ignore not found error + rd._enq_cnt_list[enq_fid]--; + } + } + break; + case RHM_JDAT_TXA_MAGIC: + { + txn_rec ar; + if (!decode(ar, fid, ifsp, cum_size_read, h, lowi, rd, file_pos)) + return false; + // Delete this txn from tmap, unlock any locked records in emap + ar.get_xid(&xidp); + assert(xidp != 0); + std::string xid((char*)xidp, ar.xid_size()); + txn_data_list tdl = _tmap.get_remove_tdata_list(xid); // tdl will be empty if xid not found + for (tdl_itr itr = tdl.begin(); itr != tdl.end(); itr++) + { + if (itr->_enq_flag) + rd._enq_cnt_list[itr->_pfid]--; + else + _emap.unlock(itr->_drid); // ignore not found error + } + std::free(xidp); + } + break; + case RHM_JDAT_TXC_MAGIC: + { + txn_rec cr; + if (!decode(cr, fid, ifsp, cum_size_read, h, lowi, rd, file_pos)) + return false; + // Delete this txn from tmap, process records into emap + cr.get_xid(&xidp); + assert(xidp != 0); + std::string xid((char*)xidp, cr.xid_size()); + txn_data_list tdl = _tmap.get_remove_tdata_list(xid); // tdl will be empty if xid not found + for (tdl_itr itr = tdl.begin(); itr != tdl.end(); itr++) + { + if (itr->_enq_flag) // txn enqueue + { + if (_emap.insert_pfid(itr->_rid, itr->_pfid) < enq_map::EMAP_OK) // fail + { + // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID. + std::ostringstream oss; + oss << std::hex << "rid=0x" << itr->_rid << " _pfid=0x" << itr->_pfid; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "jcntl", "rcvr_get_next_record"); + } + } + else // txn dequeue + { + int16_t enq_fid = _emap.get_remove_pfid(itr->_drid, true); + if (enq_fid >= enq_map::EMAP_OK) + rd._enq_cnt_list[enq_fid]--; + } + } + std::free(xidp); + } + break; + case RHM_JDAT_EMPTY_MAGIC: + { + u_int32_t rec_dblks = jrec::size_dblks(sizeof(rec_hdr)); + ifsp->ignore(rec_dblks * JRNL_DBLK_SIZE - sizeof(rec_hdr)); + assert(!ifsp->fail() && !ifsp->bad()); + if (!jfile_cycle(fid, ifsp, lowi, rd, false)) + return false; + } + break; + case 0: + check_journal_alignment(fid, file_pos, rd); + return false; + default: + // Stop as this is the overwrite boundary. + check_journal_alignment(fid, file_pos, rd); + return false; + } + return true; +} + +bool +jcntl::decode(jrec& rec, u_int16_t& fid, std::ifstream* ifsp, std::size_t& cum_size_read, + rec_hdr& h, bool& lowi, rcvdat& rd, std::streampos& file_offs) +{ + u_int16_t start_fid = fid; + std::streampos start_file_offs = file_offs; + if (!check_owi(fid, h, lowi, rd, file_offs)) + return false; + bool done = false; + while (!done) + { + try { done = rec.rcv_decode(h, ifsp, cum_size_read); } + catch (const jexception& e) + { +// TODO - review this logic and tidy up how rd._lfid is assigned. See new jinf.get_end_file() fn. +// Original +// if (e.err_code() != jerrno::JERR_JREC_BADRECTAIL || +// fid != (rd._ffid ? rd._ffid - 1 : _num_jfiles - 1)) throw; +// Tried this, but did not work +// if (e.err_code() != jerrno::JERR_JREC_BADRECTAIL || h._magic != 0) throw; + check_journal_alignment(start_fid, start_file_offs, rd); +// rd._lfid = start_fid; + return false; + } + if (!done && !jfile_cycle(fid, ifsp, lowi, rd, false)) + { + check_journal_alignment(start_fid, start_file_offs, rd); + return false; + } + } + return true; +} + +bool +jcntl::jfile_cycle(u_int16_t& fid, std::ifstream* ifsp, bool& lowi, rcvdat& rd, const bool jump_fro) +{ + if (ifsp->is_open()) + { + if (ifsp->eof() || !ifsp->good()) + { + ifsp->clear(); + rd._eo = ifsp->tellg(); // remember file offset before closing + assert(rd._eo != std::numeric_limits<std::size_t>::max()); // Check for error code -1 + ifsp->close(); + if (++fid >= rd._njf) + { + fid = 0; + lowi = !lowi; // Flip local owi + } + if (fid == rd._ffid) // used up all journal files + return false; + } + } + if (!ifsp->is_open()) + { + std::ostringstream oss; + oss << _jdir.dirname() << "/" << _base_filename << "."; + oss << std::hex << std::setfill('0') << std::setw(4) << fid << "." << JRNL_DATA_EXTENSION; + ifsp->clear(); // clear eof flag, req'd for older versions of c++ + ifsp->open(oss.str().c_str(), std::ios_base::in | std::ios_base::binary); + if (!ifsp->good()) + throw jexception(jerrno::JERR__FILEIO, oss.str(), "jcntl", "jfile_cycle"); + + // Read file header + file_hdr fhdr; + ifsp->read((char*)&fhdr, sizeof(fhdr)); + assert(ifsp->good()); + if (fhdr._magic == RHM_JDAT_FILE_MAGIC) + { + assert(fhdr._lfid == fid); + if (!rd._fro) + rd._fro = fhdr._fro; + std::streamoff foffs = jump_fro ? fhdr._fro : JRNL_DBLK_SIZE * JRNL_SBLK_SIZE; + ifsp->seekg(foffs); + } + else + { + ifsp->close(); + return false; + } + } + return true; +} + +bool +jcntl::check_owi(const u_int16_t fid, rec_hdr& h, bool& lowi, rcvdat& rd, std::streampos& file_pos) +{ + if (rd._ffid ? h.get_owi() == lowi : h.get_owi() != lowi) // Overwrite indicator changed + { + u_int16_t expected_fid = rd._ffid ? rd._ffid - 1 : rd._njf - 1; + if (fid == expected_fid) + { + check_journal_alignment(fid, file_pos, rd); + return false; + } + std::ostringstream oss; + oss << std::hex << std::setfill('0') << "Magic=0x" << std::setw(8) << h._magic; + oss << " fid=0x" << std::setw(4) << fid << " rid=0x" << std::setw(8) << h._rid; + oss << " foffs=0x" << std::setw(8) << file_pos; + oss << " expected_fid=0x" << std::setw(4) << expected_fid; + throw jexception(jerrno::JERR_JCNTL_OWIMISMATCH, oss.str(), "jcntl", + "check_owi"); + } + if (rd._h_rid == 0) + rd._h_rid = h._rid; + else if (h._rid - rd._h_rid < 0x8000000000000000ULL) // RFC 1982 comparison for unsigned 64-bit + rd._h_rid = h._rid; + return true; +} + + +void +jcntl::check_journal_alignment(const u_int16_t fid, std::streampos& file_pos, rcvdat& rd) +{ + unsigned sblk_offs = file_pos % (JRNL_DBLK_SIZE * JRNL_SBLK_SIZE); + if (sblk_offs) + { + { + std::ostringstream oss; + oss << std::hex << "Bad record alignment found at fid=0x" << fid; + oss << " offs=0x" << file_pos << " (likely journal overwrite boundary); " << std::dec; + oss << (JRNL_SBLK_SIZE - (sblk_offs/JRNL_DBLK_SIZE)) << " filler record(s) required."; + this->log(LOG_WARN, oss.str()); + } + const u_int32_t xmagic = RHM_JDAT_EMPTY_MAGIC; + std::ostringstream oss; + oss << _jdir.dirname() << "/" << _base_filename << "."; + oss << std::hex << std::setfill('0') << std::setw(4) << fid << "." << JRNL_DATA_EXTENSION; + std::ofstream ofsp(oss.str().c_str(), + std::ios_base::in | std::ios_base::out | std::ios_base::binary); + if (!ofsp.good()) + throw jexception(jerrno::JERR__FILEIO, oss.str(), "jcntl", "check_journal_alignment"); + ofsp.seekp(file_pos); + void* buff = std::malloc(JRNL_DBLK_SIZE); + assert(buff != 0); + std::memcpy(buff, (const void*)&xmagic, sizeof(xmagic)); + // Normally, RHM_CLEAN must be set before these fills are done, but this is a recover + // situation (i.e. performance is not an issue), and it makes the location of the write + // clear should inspection of the file be required. + std::memset((char*)buff + sizeof(xmagic), RHM_CLEAN_CHAR, JRNL_DBLK_SIZE - sizeof(xmagic)); + + while (file_pos % (JRNL_DBLK_SIZE * JRNL_SBLK_SIZE)) + { + ofsp.write((const char*)buff, JRNL_DBLK_SIZE); + assert(!ofsp.fail()); + std::ostringstream oss; + oss << std::hex << "Recover phase write: Wrote filler record: fid=0x" << fid << " offs=0x" << file_pos; + this->log(LOG_NOTICE, oss.str()); + file_pos = ofsp.tellp(); + } + ofsp.close(); + std::free(buff); + rd._lfid = fid; + if (!rd._frot) + rd._ffid = (fid + 1) % rd._njf; + this->log(LOG_INFO, "Bad record alignment fixed."); + } + rd._eo = file_pos; +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/jcntl.h b/cpp/src/qpid/legacystore/jrnl/jcntl.h new file mode 100644 index 0000000000..294e9ced05 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jcntl.h @@ -0,0 +1,722 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jcntl.h + * + * Qpid asynchronous store plugin library + * + * Messaging journal top-level control and interface class + * mrg::journal::jcntl. See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_JCNTL_H +#define QPID_LEGACYSTORE_JRNL_JCNTL_H + +namespace mrg +{ +namespace journal +{ + class jcntl; +} +} + +#include <cstddef> +#include <deque> +#include "qpid/legacystore/jrnl/jdir.h" +#include "qpid/legacystore/jrnl/fcntl.h" +#include "qpid/legacystore/jrnl/lpmgr.h" +#include "qpid/legacystore/jrnl/rcvdat.h" +#include "qpid/legacystore/jrnl/slock.h" +#include "qpid/legacystore/jrnl/smutex.h" +#include "qpid/legacystore/jrnl/rmgr.h" +#include "qpid/legacystore/jrnl/wmgr.h" +#include "qpid/legacystore/jrnl/wrfc.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \brief Access and control interface for the journal. This is the top-level class for the + * journal. + * + * This is the top-level journal class; one instance of this class controls one instance of the + * journal and all its files and associated control structures. Besides this class, the only + * other class that needs to be used at a higher level is the data_tok class, one instance of + * which is used per data block written to the journal, and is used to track its status through + * the AIO enqueue, read and dequeue process. + */ + class jcntl + { + protected: + /** + * \brief Journal ID + * + * This string uniquely identifies this journal instance. It will most likely be associated + * with the identity of the message queue with which it is associated. + */ + // TODO: This is not included in any files at present, add to file_hdr? + std::string _jid; + + /** + * \brief Journal directory + * + * This string stores the path to the journal directory. It may be absolute or relative, and + * should not end in a file separator character. (e.g. "/fastdisk/jdata" is correct, + * "/fastdisk/jdata/" is not.) + */ + jdir _jdir; + + /** + * \brief Base filename + * + * This string contains the base filename used for the journal files. The filenames will + * start with this base, and have various sections added to it to derive the final file names + * that will be written to disk. No file separator characters should be included here, but + * all other legal filename characters are valid. + */ + std::string _base_filename; + + /** + * \brief Initialized flag + * + * This flag starts out set to false, is set to true once this object has been initialized, + * either by calling initialize() or recover(). + */ + bool _init_flag; + + /** + * \brief Stopped flag + * + * This flag starts out false, and is set to true when stop() is called. At this point, the + * journal will no longer accept messages until either initialize() or recover() is called. + * There is no way other than through initialization to reset this flag. + */ + // TODO: It would be helpful to distinguish between states stopping and stopped. If stop(true) is called, + // then we are stopping, but must wait for all outstanding aios to return before being finally stopped. During + // this period, however, no new enqueue/dequeue/read requests may be accepted. + bool _stop_flag; + + /** + * \brief Read-only state flag used during recover. + * + * When true, this flag prevents journal write operations (enqueue and dequeue), but + * allows read to occur. It is used during recovery, and is reset when recovered() is + * called. + */ + bool _readonly_flag; + + /** + * \brief If set, calls stop() if the jouranl write pointer overruns dequeue low water + * marker. If not set, then attempts to write will throw exceptions until the journal + * file low water marker moves to the next journal file. + */ + bool _autostop; ///< Autostop flag - stops journal when overrun occurs + + // Journal control structures + u_int32_t _jfsize_sblks; ///< Journal file size in sblks + lpmgr _lpmgr; ///< LFID-PFID manager tracks inserted journal files + enq_map _emap; ///< Enqueue map for low water mark management + txn_map _tmap; ///< Transaction map open transactions + rrfc _rrfc; ///< Read journal rotating file controller + wrfc _wrfc; ///< Write journal rotating file controller + rmgr _rmgr; ///< Read page manager which manages AIO + wmgr _wmgr; ///< Write page manager which manages AIO + rcvdat _rcvdat; ///< Recovery data used for recovery + smutex _wr_mutex; ///< Mutex for journal writes + + public: + static timespec _aio_cmpl_timeout; ///< Timeout for blocking libaio returns + static timespec _final_aio_cmpl_timeout; ///< Timeout for blocking libaio returns when stopping or finalizing + + /** + * \brief Journal constructor. + * + * Constructor which sets the physical file location and base name. + * + * \param jid A unique identifier for this journal instance. + * \param jdir The directory which will contain the journal files. + * \param base_filename The string which will be used to start all journal filenames. + */ + jcntl(const std::string& jid, const std::string& jdir, const std::string& base_filename); + + /** + * \brief Destructor. + */ + virtual ~jcntl(); + + inline const std::string& id() const { return _jid; } + inline const std::string& jrnl_dir() const { return _jdir.dirname(); } + + /** + * \brief Initialize the journal for storing data. + * + * Initialize the journal by creating new journal data files and initializing internal + * control structures. When complete, the journal will be empty, and ready to store data. + * + * <b>NOTE: Any existing journal will be ignored by this operation.</b> To use recover + * the data from an existing journal, use recover(). + * + * <b>NOTE: If <i>NULL</i> is passed to the deque pointers, they will be internally created + * and deleted.</b> + * + * <b>NOTE: If <i>NULL</i> is passed to the callbacks, internal default callbacks will be + * used.</b> + * + * \param num_jfiles The number of journal files to be created. + * \param auto_expand If true, allows journal file auto-expansion. In this mode, the journal will automatically + * add files to the journal if it runs out of space. No more than ae_max_jfiles may be added. If false, then + * no files are added and an exception will be thrown if the journal runs out of file space. + * \param ae_max_jfiles Upper limit of journal files for auto-expand mode. When auto_expand is true, this is the + * maximum total number of files allowed in the journal (original plus those added by auto-expand mode). If + * this number of files exist and the journal runs out of space, an exception will be thrown. This number + * must be greater than the num_jfiles parameter value but cannot exceed the maximum number of files for a + * single journal; if num_jfiles is already at its maximum value, then auto-expand will be disabled. + * \param jfsize_sblks The size of each journal file expressed in softblocks. + * \param wcache_num_pages The number of write cache pages to create. + * \param wcache_pgsize_sblks The size in sblks of each write cache page. + * \param cbp Pointer to object containing callback functions for read and write operations. May be 0 (NULL). + * + * \exception TODO + */ + void initialize(const u_int16_t num_jfiles, const bool auto_expand, const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, const u_int16_t wcache_num_pages, const u_int32_t wcache_pgsize_sblks, + aio_callback* const cbp); + + /** + * /brief Initialize journal by recovering state from previously written journal. + * + * Initialize journal by recovering state from previously written journal. The journal files + * are analyzed, and all records that have not been dequeued and that remain in the journal + * will be available for reading. The journal is placed in a read-only state until + * recovered() is called; any calls to enqueue or dequeue will fail with an exception + * in this state. + * + * <b>NOTE: If <i>NULL</i> is passed to the deque pointers, they will be internally created + * and deleted.</b> + * + * <b>NOTE: If <i>NULL</i> is passed to the callbacks, internal default callbacks will be + * used.</b> + * + * \param num_jfiles The number of journal files to be created. + * \param auto_expand If true, allows journal file auto-expansion. In this mode, the journal will automatically + * add files to the journal if it runs out of space. No more than ae_max_jfiles may be added. If false, then + * no files are added and an exception will be thrown if the journal runs out of file space. + * \param ae_max_jfiles Upper limit of journal files for auto-expand mode. When auto_expand is true, this is the + * maximum total number of files allowed in the journal (original plus those added by auto-expand mode). If + * this number of files exist and the journal runs out of space, an exception will be thrown. This number + * must be greater than the num_jfiles parameter value but cannot exceed the maximum number of files for a + * single journal; if num_jfiles is already at its maximum value, then auto-expand will be disabled. + * \param jfsize_sblks The size of each journal file expressed in softblocks. + * \param wcache_num_pages The number of write cache pages to create. + * \param wcache_pgsize_sblks The size in sblks of each write cache page. + * \param cbp Pointer to object containing callback functions for read and write operations. May be 0 (NULL). + * \param prep_txn_list_ptr + * \param highest_rid Returns the highest rid found in the journal during recover + * + * \exception TODO + */ + void recover(const u_int16_t num_jfiles, const bool auto_expand, const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, const u_int16_t wcache_num_pages, const u_int32_t wcache_pgsize_sblks, + aio_callback* const cbp, const std::vector<std::string>* prep_txn_list_ptr, u_int64_t& highest_rid); + + /** + * \brief Notification to the journal that recovery is complete and that normal operation + * may resume. + * + * This call notifies the journal that recovery is complete and that normal operation + * may resume. The read pointers are reset so that all records read as a part of recover + * may be re-read during normal operation. The read-only flag is then reset, allowing + * enqueue and dequeue operations to resume. + * + * \exception TODO + */ + void recover_complete(); + + /** + * \brief Stops journal and deletes all journal files. + * + * Clear the journal directory of all journal files matching the base filename. + * + * \exception TODO + */ + void delete_jrnl_files(); + + /** + * \brief Enqueue data. + * + * Enqueue data or part thereof. If a large data block is being written, then it may be + * enqueued in parts by setting this_data_len to the size of the data being written in this + * call. The total data size must be known in advance, however, as this is written into the + * record header on the first record write. The state of the write (i.e. how much has been + * written so far) is maintained in the data token dtokp. Partial writes will return in state + * ENQ_PART. + * + * Note that a return value of anything other than RHM_IORES_SUCCESS implies that this write + * operation did not complete successfully or was partially completed. The action taken under + * these conditions depends on the value of the return. For example, RHM_IORES_AIO_WAIT + * implies that all pages in the write page cache are waiting for AIO operations to return, + * and that the call should be remade after waiting a bit. + * + * Example: If a write of 99 kB is divided into three equal parts, then the following states + * and returns would characterize a successful operation: + * <pre> + * dtok. dtok. dtok. + * Pperation Return wstate() dsize() written() Comment + * -----------------+--------+--------+-------+---------+------------------------------------ + * NONE 0 0 Value of dtok before op + * edr(99000, 33000) SUCCESS ENQ_PART 99000 33000 Enqueue part 1 + * edr(99000, 33000) AIO_WAIT ENQ_PART 99000 50000 Enqueue part 2, not completed + * edr(99000, 33000) SUCCESS ENQ_PART 99000 66000 Enqueue part 2 again + * edr(99000, 33000) SUCCESS ENQ 99000 99000 Enqueue part 3 + * </pre> + * + * \param data_buff Pointer to data to be enqueued for this enqueue operation. + * \param tot_data_len Total data length. + * \param this_data_len Amount to be written in this enqueue operation. + * \param dtokp Pointer to data token which contains the details of the enqueue operation. + * \param transient Flag indicating transient persistence (ie, ignored on recover). + * + * \exception TODO + */ + iores enqueue_data_record(const void* const data_buff, const std::size_t tot_data_len, + const std::size_t this_data_len, data_tok* dtokp, const bool transient = false); + + iores enqueue_extern_data_record(const std::size_t tot_data_len, data_tok* dtokp, + const bool transient = false); + + /** + * \brief Enqueue data. + * + * \param data_buff Pointer to data to be enqueued for this enqueue operation. + * \param tot_data_len Total data length. + * \param this_data_len Amount to be written in this enqueue operation. + * \param dtokp Pointer to data token which contains the details of the enqueue operation. + * \param xid String containing xid. An empty string (i.e. length=0) will be considered + * non-transactional. + * \param transient Flag indicating transient persistence (ie, ignored on recover). + * + * \exception TODO + */ + iores enqueue_txn_data_record(const void* const data_buff, const std::size_t tot_data_len, + const std::size_t this_data_len, data_tok* dtokp, const std::string& xid, + const bool transient = false); + iores enqueue_extern_txn_data_record(const std::size_t tot_data_len, data_tok* dtokp, + const std::string& xid, const bool transient = false); + + /* TODO + ** + * \brief Retrieve details of next record to be read without consuming the record. + * + * Retrieve information about current read record. A pointer to the data is returned, along + * with the data size and available data size. Data is considered "available" when the AIO + * operations to fill page-cache pages from disk have returned, and is ready for consumption. + * + * If <i>dsize_avail</i> < <i>dsize</i>, then not all of the data is available or part of + * the data is in non-contiguous memory, and a subsequent call will update both the pointer + * and <i>dsize_avail</i> if more pages have returned from AIO. + * + * The <i>dsize_avail</i> parameter will return the amount of data from this record that is + * available in the page cache as contiguous memory, even if it spans page cache boundaries. + * However, if a record spans the end of the page cache and continues at the beginning, even + * if both parts are ready for consumption, then this must be divided into at least two + * get_data_record() operations, as the data is contained in at least two non-contiguous + * segments of the page cache. + * + * Once all the available data for a record is exposed, it can not be read again using + * this function. It must be consumed prior to getting the next record. This can be done by + * calling discard_data_record() or read_data_record(). However, if parameter + * <i>auto_discard</i> is set to <b><i>true</i></b>, then this record will be automatically + * consumed when the entire record has become available without having to explicitly call + * discard_next_data_record() or read_data_record(). + * + * If the current record is an open transactional record, then it cannot be read until it is + * committed. If it is aborted, it can never be read. Under this condition, get_data_record() + * will return RHM_IORES_TXPENDING, the data pointer will be set to NULL and all data + * lengths will be set to 0. + * + * Example: Read a record of 30k. Assume a read page cache of 10 pages of size 10k starting + * at address base_ptr (page0 = base_ptr, page1 = page_ptr+10k, etc.). The first 15k of + * the record falls at the end of the page cache, the remaining 15k folded to the beginning. + * The current page (page 8) containing 5k is available, the remaining pages which contain + * this record are pending AIO return: + * <pre> + * call dsize + * no. dsize avail data ptr Return Comment + * ----+-----+-----+------------+--------+-------------------------------------------------- + * 1 30k 5k base_ptr+85k SUCCESS Initial call, read first 5k + * 2 30k 0k base_ptr+90k AIO_WAIT AIO still pending; no further pages avail + * 3 30k 10k base_ptr+90k SUCCESS AIO now returned; now read till end of page cache + * 4 30k 15k base_ptr SUCCESS data_ptr now pointing to start of page cache + * </pre> + * + * \param rid Reference that returns the record ID (rid) + * \param dsize Reference that returns the total data size of the record data . + * \param dsize_avail Reference that returns the amount of the data that is available for + * consumption. + * \param data Pointer to data pointer which will point to the first byte of the next record + * data. + * \param auto_discard If <b><i>true</i></b>, automatically discard the record being read if + * the entire record is available (i.e. dsize == dsize_avail). Otherwise + * discard_next_data_record() must be explicitly called. + * + * \exception TODO + * + // *** NOT YET IMPLEMENTED *** + iores get_data_record(const u_int64_t& rid, const std::size_t& dsize, + const std::size_t& dsize_avail, const void** const data, bool auto_discard = false); + */ + + /* TODO + ** + * \brief Discard (skip) next record to be read without reading or retrieving it. + * + * \exception TODO + * + // *** NOT YET IMPLEMENTED *** + iores discard_data_record(data_tok* const dtokp); + */ + + /** + * \brief Reads data from the journal. It is the responsibility of the reader to free + * the memory that is allocated through this call - see below for details. + * + * Reads the next non-dequeued data record from the journal. + * + * <b>Note</b> that this call allocates memory into which the data and XID are copied. It + * is the responsibility of the caller to free this memory. The memory for the data and + * XID are allocated in a single call, and the XID precedes the data in the memory space. + * Thus, where an XID exists, freeing the XID pointer will free both the XID and data memory. + * However, if an XID does not exist for the message, the XID pointer xidpp is set to NULL, + * and it is the data pointer datapp that must be freed. Should neither an XID nor data be + * present (ie an empty record), then no memory is allocated, and both pointers will be NULL. + * In this case, there is no need to free memory. + * + * TODO: Fix this lousy interface. The caller should NOT be required to clean up these + * pointers! Rather use a struct, or better still, let the data token carry the data and + * xid pointers and lengths, and have the data token both allocate and delete. + * + * \param datapp Pointer to pointer that will be set to point to memory allocated and + * containing the data. Will be set to NULL if the call fails or there is no data + * in the record. + * \param dsize Ref that will be set to the size of the data. Will be set to 0 if the call + * fails or if there is no data in the record. + * \param xidpp Pointer to pointer that will be set to point to memory allocated and + * containing the XID. Will be set to NULL if the call fails or there is no XID attached + * to this record. + * \param xidsize Ref that will be set to the size of the XID. + * \param transient Ref that will be set true if record is transient. + * \param external Ref that will be set true if record is external. In this case, the data + * pointer datapp will be set to NULL, but dsize will contain the size of the data. + * NOTE: If there is an xid, then xidpp must be freed. + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param ignore_pending_txns When false (default), if the next record to be read is locked + * by a pending transaction, the read fails with RHM_IORES_TXPENDING. However, if set + * to true, then locks are ignored. This is required for reading of the Transaction + * Prepared List (TPL) which may have its entries locked, but may be read from + * time-to-time, and needs all its records (locked and unlocked) to be available. + * + * \exception TODO + */ + iores read_data_record(void** const datapp, std::size_t& dsize, void** const xidpp, + std::size_t& xidsize, bool& transient, bool& external, data_tok* const dtokp, + bool ignore_pending_txns = false); + + /** + * \brief Dequeues (marks as no longer needed) data record in journal. + * + * Dequeues (marks as no longer needed) data record in journal. Note that it is possible + * to use the same data token instance used to enqueue this data; it contains the record ID + * needed to correctly mark this data as dequeued in the journal. Otherwise the RID of the + * record to be dequeued and the write state of ENQ must be manually set in a new or reset + * instance of data_tok. + * + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param txn_coml_commit Only used for preparedXID journal. When used for dequeueing + * prepared XID list items, sets whether the complete() was called in commit or abort + * mode. + * + * \exception TODO + */ + iores dequeue_data_record(data_tok* const dtokp, const bool txn_coml_commit = false); + + /** + * \brief Dequeues (marks as no longer needed) data record in journal. + * + * Dequeues (marks as no longer needed) data record in journal as part of a transaction. + * Note that it is possible to use the same data token instance used to enqueue this data; + * it contains the RID needed to correctly mark this data as dequeued in the journal. + * Otherwise the RID of the record to be dequeued and the write state of ENQ must be + * manually set in a new or reset instance of data_tok. + * + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param xid String containing xid. An empty string (i.e. length=0) will be considered + * non-transactional. + * \param txn_coml_commit Only used for preparedXID journal. When used for dequeueing + * prepared XID list items, sets whether the complete() was called in commit or abort + * mode. + * + * \exception TODO + */ + iores dequeue_txn_data_record(data_tok* const dtokp, const std::string& xid, const bool txn_coml_commit = false); + + /** + * \brief Abort the transaction for all records enqueued or dequeued with the matching xid. + * + * Abort the transaction for all records enqueued with the matching xid. All enqueued records + * are effectively deleted from the journal, and can not be read. All dequeued records remain + * as though they had never been dequeued. + * + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param xid String containing xid. + * + * \exception TODO + */ + iores txn_abort(data_tok* const dtokp, const std::string& xid); + + /** + * \brief Commit the transaction for all records enqueued or dequeued with the matching xid. + * + * Commit the transaction for all records enqueued with the matching xid. All enqueued + * records are effectively released for reading and dequeueing. All dequeued records are + * removed and can no longer be accessed. + * + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param xid String containing xid. + * + * \exception TODO + */ + iores txn_commit(data_tok* const dtokp, const std::string& xid); + + /** + * \brief Check whether all the enqueue records for the given xid have reached disk. + * + * \param xid String containing xid. + * + * \exception TODO + */ + bool is_txn_synced(const std::string& xid); + + /** + * \brief Forces a check for returned AIO write events. + * + * Forces a check for returned AIO write events. This is normally performed by enqueue() and + * dequeue() operations, but if these operations cease, then this call needs to be made to + * force the processing of any outstanding AIO operations. + */ + int32_t get_wr_events(timespec* const timeout); + + /** + * \brief Forces a check for returned AIO read events. + * + * Forces a check for returned AIO read events. This is normally performed by read_data() + * operations, but if these operations cease, then this call needs to be made to force the + * processing of any outstanding AIO operations. + */ + int32_t get_rd_events(timespec* const timeout); + + /** + * \brief Stop the journal from accepting any further requests to read or write data. + * + * This operation is used to stop the journal. This is the normal mechanism for bringing the + * journal to an orderly stop. Any outstanding AIO operations or partially written pages in + * the write page cache will by flushed and will complete. + * + * <b>Note:</b> The journal cannot be restarted without either initializing it or restoring + * it. + * + * \param block_till_aio_cmpl If true, will block the thread while waiting for all + * outstanding AIO operations to complete. + */ + void stop(const bool block_till_aio_cmpl = false); + + /** + * \brief Force a flush of the write page cache, creating a single AIO write operation. + */ + iores flush(const bool block_till_aio_cmpl = false); + + inline u_int32_t get_enq_cnt() const { return _emap.size(); } + + inline u_int32_t get_wr_aio_evt_rem() const { slock l(_wr_mutex); return _wmgr.get_aio_evt_rem(); } + + inline u_int32_t get_rd_aio_evt_rem() const { return _rmgr.get_aio_evt_rem(); } + + inline u_int32_t get_wr_outstanding_aio_dblks() const + { return _wrfc.aio_outstanding_dblks(); } + + inline u_int32_t get_wr_outstanding_aio_dblks(u_int16_t lfid) const + { return _lpmgr.get_fcntlp(lfid)->wr_aio_outstanding_dblks(); } + + inline u_int32_t get_rd_outstanding_aio_dblks() const + { return _rrfc.aio_outstanding_dblks(); } + + inline u_int32_t get_rd_outstanding_aio_dblks(u_int16_t lfid) const + { return _lpmgr.get_fcntlp(lfid)->rd_aio_outstanding_dblks(); } + + inline u_int16_t get_rd_fid() const { return _rrfc.index(); } + inline u_int16_t get_wr_fid() const { return _wrfc.index(); } + u_int16_t get_earliest_fid(); + + /** + * \brief Check if a particular rid is enqueued. Note that this function will return + * false if the rid is transactionally enqueued and is not committed, or if it is + * locked (i.e. transactionally dequeued, but the dequeue has not been committed). + */ + inline bool is_enqueued(const u_int64_t rid, bool ignore_lock = false) + { return _emap.is_enqueued(rid, ignore_lock); } + inline bool is_locked(const u_int64_t rid) + { if (_emap.is_enqueued(rid, true) < enq_map::EMAP_OK) return false; return _emap.is_locked(rid) == enq_map::EMAP_TRUE; } + inline void enq_rid_list(std::vector<u_int64_t>& rids) { _emap.rid_list(rids); } + inline void enq_xid_list(std::vector<std::string>& xids) { _tmap.xid_list(xids); } + inline u_int32_t get_open_txn_cnt() const { return _tmap.size(); } + // TODO Make this a const, but txn_map must support const first. + inline txn_map& get_txn_map() { return _tmap; } + + /** + * \brief Check if the journal is stopped. + * + * \return <b><i>true</i></b> if the jouranl is stopped; + * <b><i>false</i></b> otherwise. + */ + inline bool is_stopped() { return _stop_flag; } + + /** + * \brief Check if the journal is ready to read and write data. + * + * Checks if the journal is ready to read and write data. This function will return + * <b><i>true</i></b> if the journal has been either initialized or restored, and the stop() + * function has not been called since the initialization. + * + * Note that the journal may also be stopped if an internal error occurs (such as running out + * of data journal file space). + * + * \return <b><i>true</i></b> if the journal is ready to read and write data; + * <b><i>false</i></b> otherwise. + */ + inline bool is_ready() const { return _init_flag && !_stop_flag; } + + inline bool is_read_only() const { return _readonly_flag; } + + /** + * \brief Get the journal directory. + * + * This returns the journal directory as set during initialization. This is the directory + * into which the journal files will be written. + */ + inline const std::string& dirname() const { return _jdir.dirname(); } + + /** + * \brief Get the journal base filename. + * + * Get the journal base filename as set during initialization. This is the prefix used in all + * journal files of this instance. Note that if more than one instance of the journal shares + * the same directory, their base filenames <b>MUST</b> be different or else the instances + * will overwrite one another. + */ + inline const std::string& base_filename() const { return _base_filename; } + + inline u_int16_t num_jfiles() const { return _lpmgr.num_jfiles(); } + + inline fcntl* get_fcntlp(const u_int16_t lfid) const { return _lpmgr.get_fcntlp(lfid); } + + inline u_int32_t jfsize_sblks() const { return _jfsize_sblks; } + + // Logging + virtual void log(log_level level, const std::string& log_stmt) const; + virtual void log(log_level level, const char* const log_stmt) const; + + // FIXME these are _rmgr to _wmgr interactions, remove when _rmgr contains ref to _wmgr: + void chk_wr_frot(); + inline u_int32_t unflushed_dblks() { return _wmgr.unflushed_dblks(); } + void fhdr_wr_sync(const u_int16_t lid); + inline u_int32_t wr_subm_cnt_dblks(const u_int16_t lfid) const { return _lpmgr.get_fcntlp(lfid)->wr_subm_cnt_dblks(); } + + // Management instrumentation callbacks + inline virtual void instr_incr_outstanding_aio_cnt() {} + inline virtual void instr_decr_outstanding_aio_cnt() {} + + /** + * /brief Static function for creating new fcntl objects for use with obj_arr. + */ + static fcntl* new_fcntl(jcntl* const jcp, const u_int16_t lid, const u_int16_t fid, const rcvdat* const rdp); + + protected: + static bool _init; + static bool init_statics(); + + /** + * \brief Check status of journal before allowing write operations. + */ + void check_wstatus(const char* fn_name) const; + + /** + * \brief Check status of journal before allowing read operations. + */ + void check_rstatus(const char* fn_name) const; + + /** + * \brief Write info file <basefilename>.jinf to disk + */ + void write_infofile() const; + + /** + * \brief Call that blocks while waiting for all outstanding AIOs to complete + */ + void aio_cmpl_wait(); + + /** + * \brief Call that blocks until at least one message returns; used to wait for + * AIO wait conditions to clear. + */ + bool handle_aio_wait(const iores res, iores& resout, const data_tok* dtp); + + /** + * \brief Analyze journal for recovery. + */ + void rcvr_janalyze(rcvdat& rd, const std::vector<std::string>* prep_txn_list_ptr); + + bool rcvr_get_next_record(u_int16_t& fid, std::ifstream* ifsp, bool& lowi, rcvdat& rd); + + bool decode(jrec& rec, u_int16_t& fid, std::ifstream* ifsp, std::size_t& cum_size_read, + rec_hdr& h, bool& lowi, rcvdat& rd, std::streampos& rec_offset); + + bool jfile_cycle(u_int16_t& fid, std::ifstream* ifsp, bool& lowi, rcvdat& rd, + const bool jump_fro); + + bool check_owi(const u_int16_t fid, rec_hdr& h, bool& lowi, rcvdat& rd, + std::streampos& read_pos); + + void check_journal_alignment(const u_int16_t fid, std::streampos& rec_offset, rcvdat& rd); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_JCNTL_H diff --git a/cpp/src/qpid/legacystore/jrnl/jdir.cpp b/cpp/src/qpid/legacystore/jrnl/jdir.cpp new file mode 100644 index 0000000000..a874c6c945 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jdir.cpp @@ -0,0 +1,463 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jdir.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::jdir (journal data + * directory), used for controlling and manipulating journal data + * direcories and files. See comments in file jdir.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/jdir.h" + +#include <cstdlib> +#include <cstring> +#include <cerrno> +#include <iomanip> +#include "qpid/legacystore/jrnl/jcfg.h" +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include <sstream> +#include <sys/stat.h> +#include <unistd.h> + +namespace mrg +{ +namespace journal +{ + +jdir::jdir(const std::string& dirname, const std::string& _base_filename): + _dirname(dirname), + _base_filename(_base_filename) +{} + +jdir::~jdir() +{} + +// === create_dir === + +void +jdir::create_dir() +{ + create_dir(_dirname); +} + + +void +jdir::create_dir(const char* dirname) +{ + create_dir(std::string(dirname)); +} + + +void +jdir::create_dir(const std::string& dirname) +{ + std::size_t fdp = dirname.find_last_of('/'); + if (fdp != std::string::npos) + { + std::string parent_dir = dirname.substr(0, fdp); + if (!exists(parent_dir)) + create_dir(parent_dir); + } + if (::mkdir(dirname.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)) + { + if (errno != EEXIST) // Dir exists, ignore + { + std::ostringstream oss; + oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_MKDIR, oss.str(), "jdir", "create_dir"); + } + } +} + + +// === clear_dir === + +void +jdir::clear_dir(const bool create_flag) +{ + clear_dir(_dirname, _base_filename, create_flag); +} + +void +jdir::clear_dir(const char* dirname, const char* base_filename, const bool create_flag) +{ + clear_dir(std::string(dirname), std::string(base_filename), create_flag); +} + + +void +jdir::clear_dir(const std::string& dirname, const std::string& +#ifndef RHM_JOWRITE + base_filename +#endif + , const bool create_flag) +{ + DIR* dir = ::opendir(dirname.c_str()); + if (!dir) + { + if (errno == 2 && create_flag) // ENOENT (No such file or dir) + { + create_dir(dirname); + return; + } + std::ostringstream oss; + oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_OPENDIR, oss.str(), "jdir", "clear_dir"); + } +#ifndef RHM_JOWRITE + struct dirent* entry; + bool found = false; + std::string bak_dir; + while ((entry = ::readdir(dir)) != 0) + { + // Ignore . and .. + if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) + { + if (std::strlen(entry->d_name) > base_filename.size()) + { + if (std::strncmp(entry->d_name, base_filename.c_str(), base_filename.size()) == 0) + { + if (!found) + { + bak_dir = create_bak_dir(dirname, base_filename); + found = true; + } + std::ostringstream oldname; + oldname << dirname << "/" << entry->d_name; + std::ostringstream newname; + newname << bak_dir << "/" << entry->d_name; + if (::rename(oldname.str().c_str(), newname.str().c_str())) + { + ::closedir(dir); + std::ostringstream oss; + oss << "file=\"" << oldname.str() << "\" dest=\"" << + newname.str() << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "jdir", "clear_dir"); + } + } + } + } + } +// FIXME: Find out why this fails with false alarms/errors from time to time... +// While commented out, there is no error capture from reading dir entries. +// check_err(errno, dir, dirname, "clear_dir"); +#endif + close_dir(dir, dirname, "clear_dir"); +} + +// === push_down === + +std::string +jdir::push_down(const std::string& dirname, const std::string& target_dir, const std::string& bak_dir_base) +{ + std::string bak_dir_name = create_bak_dir(dirname, bak_dir_base); + + DIR* dir = ::opendir(dirname.c_str()); + if (!dir) + { + std::ostringstream oss; + oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_OPENDIR, oss.str(), "jdir", "push_down"); + } + // Copy contents of targetDirName into bak dir + struct dirent* entry; + while ((entry = ::readdir(dir)) != 0) + { + // Search for targetDirName in storeDirName + if (std::strcmp(entry->d_name, target_dir.c_str()) == 0) + { + std::ostringstream oldname; + oldname << dirname << "/" << target_dir; + std::ostringstream newname; + newname << bak_dir_name << "/" << target_dir; + if (::rename(oldname.str().c_str(), newname.str().c_str())) + { + ::closedir(dir); + std::ostringstream oss; + oss << "file=\"" << oldname.str() << "\" dest=\"" << newname.str() << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "jdir", "push_down"); + } + break; + } + } + close_dir(dir, dirname, "push_down"); + return bak_dir_name; +} + +// === verify_dir === + +void +jdir::verify_dir() +{ + verify_dir(_dirname, _base_filename); +} + +void +jdir::verify_dir(const char* dirname, const char* base_filename) +{ + verify_dir(std::string(dirname), std::string(base_filename)); +} + + +void +jdir::verify_dir(const std::string& dirname, const std::string& base_filename) +{ + if (!is_dir(dirname)) + { + std::ostringstream oss; + oss << "dir=\"" << dirname << "\""; + throw jexception(jerrno::JERR_JDIR_NOTDIR, oss.str(), "jdir", "verify_dir"); + } + + // Read jinf file, then verify all journal files are present + jinf ji(dirname + "/" + base_filename + "." + JRNL_INFO_EXTENSION, true); + for (u_int16_t fnum=0; fnum < ji.num_jfiles(); fnum++) + { + std::ostringstream oss; + oss << dirname << "/" << base_filename << "."; + oss << std::setw(4) << std::setfill('0') << std::hex << fnum; + oss << "." << JRNL_DATA_EXTENSION; + if (!exists(oss.str())) + throw jexception(jerrno::JERR_JDIR_NOSUCHFILE, oss.str(), "jdir", "verify_dir"); + } +} + + +// === delete_dir === + +void +jdir::delete_dir(bool children_only) +{ + delete_dir(_dirname, children_only); +} + +void +jdir::delete_dir(const char* dirname, bool children_only) +{ + delete_dir(std::string(dirname), children_only); +} + +void +jdir::delete_dir(const std::string& dirname, bool children_only) +{ + struct dirent* entry; + struct stat s; + DIR* dir = ::opendir(dirname.c_str()); + if (!dir) + { + if (errno == ENOENT) // dir does not exist. + return; + + std::ostringstream oss; + oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_OPENDIR, oss.str(), "jdir", "delete_dir"); + } + else + { + while ((entry = ::readdir(dir)) != 0) + { + // Ignore . and .. + if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) + { + std::string full_name(dirname + "/" + entry->d_name); + if (::lstat(full_name.c_str(), &s)) + { + ::closedir(dir); + std::ostringstream oss; + oss << "stat: file=\"" << full_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "delete_dir"); + } + if (S_ISREG(s.st_mode) || S_ISLNK(s.st_mode)) // This is a file or slink + { + if(::unlink(full_name.c_str())) + { + ::closedir(dir); + std::ostringstream oss; + oss << "unlink: file=\"" << entry->d_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_UNLINK, oss.str(), "jdir", "delete_dir"); + } + } + else if (S_ISDIR(s.st_mode)) // This is a dir + { + delete_dir(full_name); + } + else // all other types, throw up! + { + ::closedir(dir); + std::ostringstream oss; + oss << "file=\"" << entry->d_name << "\" is not a dir, file or slink."; + oss << " (mode=0x" << std::hex << s.st_mode << std::dec << ")"; + throw jexception(jerrno::JERR_JDIR_BADFTYPE, oss.str(), "jdir", "delete_dir"); + } + } + } + +// FIXME: Find out why this fails with false alarms/errors from time to time... +// While commented out, there is no error capture from reading dir entries. +// check_err(errno, dir, dirname, "delete_dir"); + } + // Now dir is empty, close and delete it + close_dir(dir, dirname, "delete_dir"); + + if (!children_only) + if (::rmdir(dirname.c_str())) + { + std::ostringstream oss; + oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_RMDIR, oss.str(), "jdir", "delete_dir"); + } +} + + +std::string +jdir::create_bak_dir(const std::string& dirname, const std::string& base_filename) +{ + DIR* dir = ::opendir(dirname.c_str()); + long dir_num = 0L; + if (!dir) + { + std::ostringstream oss; + oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_OPENDIR, oss.str(), "jdir", "create_bak_dir"); + } + struct dirent* entry; + while ((entry = ::readdir(dir)) != 0) + { + // Ignore . and .. + if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) + { + if (std::strlen(entry->d_name) == base_filename.size() + 10) // Format: basename.bak.XXXX + { + std::ostringstream oss; + oss << "_" << base_filename << ".bak."; + if (std::strncmp(entry->d_name, oss.str().c_str(), base_filename.size() + 6) == 0) + { + long this_dir_num = std::strtol(entry->d_name + base_filename.size() + 6, 0, 16); + if (this_dir_num > dir_num) + dir_num = this_dir_num; + } + } + } + } +// FIXME: Find out why this fails with false alarms/errors from time to time... +// While commented out, there is no error capture from reading dir entries. +// check_err(errno, dir, dirname, "create_bak_dir"); + close_dir(dir, dirname, "create_bak_dir"); + + std::ostringstream dn; + dn << dirname << "/_" << base_filename << ".bak." << std::hex << std::setw(4) << + std::setfill('0') << ++dir_num; + if (::mkdir(dn.str().c_str(), S_IRWXU | S_IRWXG | S_IROTH)) + { + std::ostringstream oss; + oss << "dir=\"" << dn.str() << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_MKDIR, oss.str(), "jdir", "create_bak_dir"); + } + return std::string(dn.str()); +} + +bool +jdir::is_dir(const char* name) +{ + struct stat s; + if (::stat(name, &s)) + { + std::ostringstream oss; + oss << "file=\"" << name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "is_dir"); + } + return S_ISDIR(s.st_mode); +} + +bool +jdir::is_dir(const std::string& name) +{ + return is_dir(name.c_str()); +} + +bool +jdir::exists(const char* name) +{ + struct stat s; + if (::stat(name, &s)) + { + if (errno == ENOENT) // No such dir or file + return false; + // Throw for any other condition + std::ostringstream oss; + oss << "file=\"" << name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "exists"); + } + return true; +} + +bool +jdir::exists(const std::string& name) +{ + return exists(name.c_str()); +} + +void +jdir::check_err(const int err_num, DIR* dir, const std::string& dir_name, const std::string& fn_name) +{ + if (err_num) + { + std::ostringstream oss; + oss << "dir=\"" << dir_name << "\"" << FORMAT_SYSERR(err_num); + ::closedir(dir); // Try to close, it makes no sense to trap errors here... + throw jexception(jerrno::JERR_JDIR_READDIR, oss.str(), "jdir", fn_name); + } +} + +void +jdir::close_dir(DIR* dir, const std::string& dir_name, const std::string& fn_name) +{ + if (::closedir(dir)) + { + std::ostringstream oss; + oss << "dir=\"" << dir_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_CLOSEDIR, oss.str(), "jdir", fn_name); + } +} + +std::ostream& +operator<<(std::ostream& os, const jdir& jdir) +{ + os << jdir._dirname; + return os; +} + +std::ostream& +operator<<(std::ostream& os, const jdir* jdirPtr) +{ + os << jdirPtr->_dirname; + return os; +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/jdir.h b/cpp/src/qpid/legacystore/jrnl/jdir.h new file mode 100644 index 0000000000..e129b794d6 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jdir.h @@ -0,0 +1,379 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jdir.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::jdir (%journal data + * directory), used for controlling and manipulating %journal data + * directories and files. See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_JDIR_H +#define QPID_LEGACYSTORE_JRNL_JDIR_H + +namespace mrg +{ +namespace journal +{ +class jdir; +} +} + +#include "qpid/legacystore/jrnl/jinf.h" +#include <dirent.h> + +namespace mrg +{ +namespace journal +{ + + /** + * \class jdir + * \brief Class to manage the %journal directory + */ + class jdir + { + private: + std::string _dirname; + std::string _base_filename; + + public: + + /** + * \brief Sole constructor + * + * \param dirname Name of directory to be managed. + * \param base_filename Filename root used in the creation of %journal files + * and sub-directories. + */ + jdir(const std::string& dirname, const std::string& base_filename); + + virtual ~jdir(); + + + /** + * \brief Create %journal directory as set in the dirname parameter of the constructor. + * Recursive creation is supported. + * + * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed. + */ + void create_dir(); + + /** + * \brief Static function to create a directory. Recursive creation is supported. + * + * \param dirname C-string containing name of directory. + * + * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed. + */ + static void create_dir(const char* dirname); + + /** + * \brief Static function to create a directory. Recursive creation is supported. + * + * \param dirname String containing name of directory. + * + * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed. + */ + static void create_dir(const std::string& dirname); + + + /** + * \brief Clear the %journal directory of files matching the base filename + * by moving them into a subdirectory. This fn uses the dirname and base_filename + * that were set on construction. + * + * \param create_flag If set, create dirname if it is non-existent, otherwise throw + * exception. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup + * directory failed. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + */ + void clear_dir(const bool create_flag = true); + + /** + * \brief Clear the directory dirname of %journal files matching base_filename + * by moving them into a subdirectory. + * + * \param dirname C-string containing name of %journal directory. + * \param base_filename C-string containing base filename of %journal files to be matched + * for moving into subdirectory. + * \param create_flag If set, create dirname if it is non-existent, otherwise throw + * exception + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup + * directory failed. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + */ + static void clear_dir(const char* dirname, const char* base_filename, + const bool create_flag = true); + + /** + * \brief Clear the directory dirname of %journal files matching base_filename + * by moving them into a subdirectory. + * + * \param dirname String containing name of %journal directory. + * \param base_filename String containing base filename of %journal files to be matched + * for moving into subdirectory. + * \param create_flag If set, create dirname if it is non-existent, otherwise throw + * exception + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup + * directory failed. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + */ + static void clear_dir(const std::string& dirname, const std::string& base_filename, + const bool create_flag = true); + + + + /** + * \brief Move (push down) the directory target_dir located in directory dirname into a backup directory + * named _bak_dir_base.XXXX (note prepended underscore), where XXXX is an increasing hex serial number + * starting at 0000. + * + * \param dirname Full path to directory containing directory to be pushed down. + * \param target_dir Name of directory in dirname to be pushed down. + * \param bak_dir_base Base name for backup directory to be created in dirname, into which target_dir will be moved. + * \return Name of backup dir into which target_dir was pushed. + */ + static std::string push_down(const std::string& dirname, const std::string& target_dir, const std::string& bak_dir_base); + + + /** + * \brief Verify that dirname is a valid %journal directory. + * + * The validation reads the .%jinf file, and using this information verifies that all the expected %journal + * (.jdat) files are present. + * + * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname + * \exception jerrno::JERR__FILEIO Error reading %jinf file + * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file + * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing + */ + void verify_dir(); + + /** + * \brief Verify that dirname is a valid %journal directory. + * + * The validation reads the .%jinf file, and using this information verifies that all the expected %journal + * (.jdat) files are present. + * + * \param dirname C-string containing name of %journal directory. + * \param base_filename C-string containing base filename of %journal files to be matched for moving into sub-directory. + * + * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname + * \exception jerrno::JERR__FILEIO Error reading %jinf file + * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file + * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing + */ + static void verify_dir(const char* dirname, const char* base_filename); + + /** + * \brief Verify that dirname is a valid %journal directory. + * + * The validation reads the .%jinf file, and using this information verifies that all the expected %journal + * (.jdat) files are present. + * + * \param dirname String containing name of %journal directory. + * \param base_filename String containing base filename of %journal files to be matched for moving into sub-directory. + * + * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname + * \exception jerrno::JERR__FILEIO Error reading %jinf file + * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file + * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing + */ + static void verify_dir(const std::string& dirname, const std::string& base_filename); + + /** + * \brief Delete the %journal directory and all files and sub--directories that it may + * contain. This is equivilent of rm -rf. + * + * FIXME: links are not handled correctly. + * + * \param children_only If true, delete only children of dirname, but leave dirname itself. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname. + * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted. + * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted. + */ + void delete_dir(bool children_only = false ); + + /** + * \brief Delete the %journal directory and all files and sub--directories that it may + * contain. This is equivilent of rm -rf. + * + * FIXME: links are not handled correctly. + * + * \param dirname C-string containing name of directory to be deleted. + * \param children_only If true, delete only children of dirname, but leave dirname itself. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname. + * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted. + * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted. + */ + static void delete_dir(const char* dirname, bool children_only = false); + + /** + * \brief Delete the %journal directory and all files and sub--directories that it may + * contain. This is equivilent of rm -rf. + * + * FIXME: links are not handled correctly. + * + * \param dirname String containing name of directory to be deleted. + * \param children_only If true, delete only children of dirname, but leave dirname itself. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname. + * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted. + * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted. + */ + static void delete_dir(const std::string& dirname, bool children_only = false); + + /** + * \brief Create bakup directory that is next in sequence and move all %journal files + * matching base_filename into it. + * + * In directory dirname, search for existing backup directory using pattern + * "_basename.bak.XXXX" where XXXX is a hexadecimal sequence, and create next directory + * based on highest number found. Move all %journal files which match the base_fileaname + * parameter into this new backup directory. + * + * \param dirname String containing name of %journal directory. + * \param base_filename String containing base filename of %journal files to be matched + * for moving into subdirectory. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + * \exception jerrno::JERR_JDIR_MKDIR The backup directory could not be deleted. + */ + static std::string create_bak_dir(const std::string& dirname, + const std::string& base_filename); + + /** + * \brief Return the directory name as a string. + */ + inline const std::string& dirname() const { return _dirname; } + + /** + * \brief Return the %journal base filename name as a string. + */ + inline const std::string& base_filename() const { return _base_filename; } + + /** + * \brief Test whether the named file is a directory. + * + * \param name Name of file to be tested. + * \return <b><i>true</i></b> if the named file is a directory; <b><i>false</i></b> + * otherwise. + * \exception jerrno::JERR_JDIR_STAT Could not stat name. + */ + static bool is_dir(const char* name); + + /** + * \brief Test whether the named file is a directory. + * + * \param name Name of file to be tested. + * \return <b><i>true</i></b> if the named file is a directory; <b><i>false</i></b> + * otherwise. + * \exception jerrno::JERR_JDIR_STAT Could not stat name. + */ + static bool is_dir(const std::string& name); + + + /** + * \brief Test whether the named entity exists on the filesystem. + * + * If stat() fails with error ENOENT, then this will return <b><i>false</i></b>. If + * stat() succeeds, then <b><i>true</i></b> is returned, irrespective of the file type. + * If stat() fails with any other error, an exception is thrown. + * + * \param name Name of entity to be tested. + * \return <b><i>true</i></b> if the named entity exists; <b><i>false</i></b> + * otherwise. + * \exception jerrno::JERR_JDIR_STAT Could not stat name. + */ + static bool exists(const char* name); + + /** + * \brief Test whether the named entity exists on the filesystem. + * + * If stat() fails with error ENOENT, then this will return <b><i>false</i></b>. If + * stat() succeeds, then <b><i>true</i></b> is returned, irrespective of the file type. + * If stat() fails with any other error, an exception is thrown. + * + * \param name Name of entity to be tested. + * \return <b><i>true</i></b> if the named entity exists; <b><i>false</i></b> + * otherwise. + * \exception jerrno::JERR_JDIR_STAT Could not stat name. + */ + static bool exists(const std::string& name); + + /** + * \brief Stream operator + */ + friend std::ostream& operator<<(std::ostream& os, const jdir& jdir); + + /** + * \brief Stream operator + */ + friend std::ostream& operator<<(std::ostream& os, const jdir* jdirPtr); + + private: + /** + * \brief Check for error, if non-zero close DIR handle and throw JERR_JDIR_READDIR + * + * \exception jerrno::JERR_JDIR_READDIR Error while reading contents of dir. + */ + static void check_err(const int err_num, DIR* dir, const std::string& dir_name, const std::string& fn_name); + + /** + * \brief Close a DIR handle, throw JERR_JDIR_CLOSEDIR if error occurs during close + * + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + */ + static void close_dir(DIR* dir, const std::string& dir_name, const std::string& fn_name); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_JDIR_H diff --git a/cpp/src/qpid/legacystore/jrnl/jerrno.cpp b/cpp/src/qpid/legacystore/jrnl/jerrno.cpp new file mode 100644 index 0000000000..4962ce63ab --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jerrno.cpp @@ -0,0 +1,253 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jerrno.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::jerrno (journal error + * codes). See comments in file jerrno.h for details. + * + * See file jerrno.h for class details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/jerrno.h" + +namespace mrg +{ +namespace journal +{ + +std::map<u_int32_t, const char*> jerrno::_err_map; +std::map<u_int32_t, const char*>::iterator jerrno::_err_map_itr; +bool jerrno::_initialized = jerrno::__init(); + +// generic errors +const u_int32_t jerrno::JERR__MALLOC = 0x0100; +const u_int32_t jerrno::JERR__UNDERFLOW = 0x0101; +const u_int32_t jerrno::JERR__NINIT = 0x0102; +const u_int32_t jerrno::JERR__AIO = 0x0103; +const u_int32_t jerrno::JERR__FILEIO = 0x0104; +const u_int32_t jerrno::JERR__RTCLOCK = 0x0105; +const u_int32_t jerrno::JERR__PTHREAD = 0x0106; +const u_int32_t jerrno::JERR__TIMEOUT = 0x0107; +const u_int32_t jerrno::JERR__UNEXPRESPONSE = 0x0108; +const u_int32_t jerrno::JERR__RECNFOUND = 0x0109; +const u_int32_t jerrno::JERR__NOTIMPL = 0x010a; + +// class jcntl +const u_int32_t jerrno::JERR_JCNTL_STOPPED = 0x0200; +const u_int32_t jerrno::JERR_JCNTL_READONLY = 0x0201; +const u_int32_t jerrno::JERR_JCNTL_AIOCMPLWAIT = 0x0202; +const u_int32_t jerrno::JERR_JCNTL_UNKNOWNMAGIC = 0x0203; +const u_int32_t jerrno::JERR_JCNTL_NOTRECOVERED = 0x0204; +const u_int32_t jerrno::JERR_JCNTL_RECOVERJFULL = 0x0205; +const u_int32_t jerrno::JERR_JCNTL_OWIMISMATCH = 0x0206; + +// class jdir +const u_int32_t jerrno::JERR_JDIR_NOTDIR = 0x0300; +const u_int32_t jerrno::JERR_JDIR_MKDIR = 0x0301; +const u_int32_t jerrno::JERR_JDIR_OPENDIR = 0x0302; +const u_int32_t jerrno::JERR_JDIR_READDIR = 0x0303; +const u_int32_t jerrno::JERR_JDIR_CLOSEDIR = 0x0304; +const u_int32_t jerrno::JERR_JDIR_RMDIR = 0x0305; +const u_int32_t jerrno::JERR_JDIR_NOSUCHFILE = 0x0306; +const u_int32_t jerrno::JERR_JDIR_FMOVE = 0x0307; +const u_int32_t jerrno::JERR_JDIR_STAT = 0x0308; +const u_int32_t jerrno::JERR_JDIR_UNLINK = 0x0309; +const u_int32_t jerrno::JERR_JDIR_BADFTYPE = 0x030a; + +// class fcntl +const u_int32_t jerrno::JERR_FCNTL_OPENWR = 0x0400; +const u_int32_t jerrno::JERR_FCNTL_WRITE = 0x0401; +const u_int32_t jerrno::JERR_FCNTL_CLOSE = 0x0402; +const u_int32_t jerrno::JERR_FCNTL_FILEOFFSOVFL = 0x0403; +const u_int32_t jerrno::JERR_FCNTL_CMPLOFFSOVFL = 0x0404; +const u_int32_t jerrno::JERR_FCNTL_RDOFFSOVFL = 0x0405; + +// class lfmgr +const u_int32_t jerrno::JERR_LFMGR_BADAEFNUMLIM = 0x0500; +const u_int32_t jerrno::JERR_LFMGR_AEFNUMLIMIT = 0x0501; +const u_int32_t jerrno::JERR_LFMGR_AEDISABLED = 0x0502; + +// class rrfc +const u_int32_t jerrno::JERR_RRFC_OPENRD = 0x0600; + +// class jrec, enq_rec, deq_rec, txn_rec +const u_int32_t jerrno::JERR_JREC_BADRECHDR = 0x0700; +const u_int32_t jerrno::JERR_JREC_BADRECTAIL = 0x0701; + +// class wmgr +const u_int32_t jerrno::JERR_WMGR_BADPGSTATE = 0x0801; +const u_int32_t jerrno::JERR_WMGR_BADDTOKSTATE = 0x0802; +const u_int32_t jerrno::JERR_WMGR_ENQDISCONT = 0x0803; +const u_int32_t jerrno::JERR_WMGR_DEQDISCONT = 0x0804; +const u_int32_t jerrno::JERR_WMGR_DEQRIDNOTENQ = 0x0805; + +// class rmgr +const u_int32_t jerrno::JERR_RMGR_UNKNOWNMAGIC = 0x0900; +const u_int32_t jerrno::JERR_RMGR_RIDMISMATCH = 0x0901; +//const u_int32_t jerrno::JERR_RMGR_FIDMISMATCH = 0x0902; +const u_int32_t jerrno::JERR_RMGR_ENQSTATE = 0x0903; +const u_int32_t jerrno::JERR_RMGR_BADRECTYPE = 0x0904; + +// class data_tok +const u_int32_t jerrno::JERR_DTOK_ILLEGALSTATE = 0x0a00; +// const u_int32_t jerrno::JERR_DTOK_RIDNOTSET = 0x0a01; + +// class enq_map, txn_map +const u_int32_t jerrno::JERR_MAP_DUPLICATE = 0x0b00; +const u_int32_t jerrno::JERR_MAP_NOTFOUND = 0x0b01; +const u_int32_t jerrno::JERR_MAP_LOCKED = 0x0b02; + +// class jinf +const u_int32_t jerrno::JERR_JINF_CVALIDFAIL = 0x0c00; +const u_int32_t jerrno::JERR_JINF_NOVALUESTR = 0x0c01; +const u_int32_t jerrno::JERR_JINF_BADVALUESTR = 0x0c02; +const u_int32_t jerrno::JERR_JINF_JDATEMPTY = 0x0c03; +const u_int32_t jerrno::JERR_JINF_TOOMANYFILES = 0x0c04; +const u_int32_t jerrno::JERR_JINF_INVALIDFHDR = 0x0c05; +const u_int32_t jerrno::JERR_JINF_STAT = 0x0c06; +const u_int32_t jerrno::JERR_JINF_NOTREGFILE = 0x0c07; +const u_int32_t jerrno::JERR_JINF_BADFILESIZE = 0x0c08; +const u_int32_t jerrno::JERR_JINF_OWIBAD = 0x0c09; +const u_int32_t jerrno::JERR_JINF_ZEROLENFILE = 0x0c0a; + +// Negative returns for some functions +const int32_t jerrno::AIO_TIMEOUT = -1; +const int32_t jerrno::LOCK_TAKEN = -2; + + +// static initialization fn + +bool +jerrno::__init() +{ + // generic errors + _err_map[JERR__MALLOC] = "JERR__MALLOC: Buffer memory allocation failed."; + _err_map[JERR__UNDERFLOW] = "JERR__UNDERFLOW: Underflow error"; + _err_map[JERR__NINIT] = "JERR__NINIT: Operation on uninitialized class."; + _err_map[JERR__AIO] = "JERR__AIO: AIO error."; + _err_map[JERR__FILEIO] = "JERR__FILEIO: File read or write failure."; + _err_map[JERR__RTCLOCK] = "JERR__RTCLOCK: Reading real-time clock failed."; + _err_map[JERR__PTHREAD] = "JERR__PTHREAD: pthread failure."; + _err_map[JERR__TIMEOUT] = "JERR__TIMEOUT: Timeout waiting for event."; + _err_map[JERR__UNEXPRESPONSE] = "JERR__UNEXPRESPONSE: Unexpected response to call or event."; + _err_map[JERR__RECNFOUND] = "JERR__RECNFOUND: Record not found."; + _err_map[JERR__NOTIMPL] = "JERR__NOTIMPL: Not implemented"; + + // class jcntl + _err_map[JERR_JCNTL_STOPPED] = "JERR_JCNTL_STOPPED: Operation on stopped journal."; + _err_map[JERR_JCNTL_READONLY] = "JERR_JCNTL_READONLY: Write operation on read-only journal (during recovery)."; + _err_map[JERR_JCNTL_AIOCMPLWAIT] = "JERR_JCNTL_AIOCMPLWAIT: Timeout waiting for AIOs to complete."; + _err_map[JERR_JCNTL_UNKNOWNMAGIC] = "JERR_JCNTL_UNKNOWNMAGIC: Found record with unknown magic."; + _err_map[JERR_JCNTL_NOTRECOVERED] = "JERR_JCNTL_NOTRECOVERED: Operation requires recover() to be run first."; + _err_map[JERR_JCNTL_RECOVERJFULL] = "JERR_JCNTL_RECOVERJFULL: Journal data files full, cannot write."; + _err_map[JERR_JCNTL_OWIMISMATCH] = "JERR_JCNTL_OWIMISMATCH: Overwrite Indicator (OWI) change found in unexpected location."; + + // class jdir + _err_map[JERR_JDIR_NOTDIR] = "JERR_JDIR_NOTDIR: Directory name exists but is not a directory."; + _err_map[JERR_JDIR_MKDIR] = "JERR_JDIR_MKDIR: Directory creation failed."; + _err_map[JERR_JDIR_OPENDIR] = "JERR_JDIR_OPENDIR: Directory open failed."; + _err_map[JERR_JDIR_READDIR] = "JERR_JDIR_READDIR: Directory read failed."; + _err_map[JERR_JDIR_CLOSEDIR] = "JERR_JDIR_CLOSEDIR: Directory close failed."; + _err_map[JERR_JDIR_RMDIR] = "JERR_JDIR_RMDIR: Directory delete failed."; + _err_map[JERR_JDIR_NOSUCHFILE] = "JERR_JDIR_NOSUCHFILE: File does not exist."; + _err_map[JERR_JDIR_FMOVE] = "JERR_JDIR_FMOVE: File move failed."; + _err_map[JERR_JDIR_STAT] = "JERR_JDIR_STAT: File stat failed."; + _err_map[JERR_JDIR_UNLINK] = "JERR_JDIR_UNLINK: File delete failed."; + _err_map[JERR_JDIR_BADFTYPE] = "JERR_JDIR_BADFTYPE: Bad or unknown file type (stat mode)."; + + // class fcntl + _err_map[JERR_FCNTL_OPENWR] = "JERR_FCNTL_OPENWR: Unable to open file for write."; + _err_map[JERR_FCNTL_WRITE] = "JERR_FCNTL_WRITE: Unable to write to file."; + _err_map[JERR_FCNTL_CLOSE] = "JERR_FCNTL_CLOSE: File close failed."; + _err_map[JERR_FCNTL_FILEOFFSOVFL] = "JERR_FCNTL_FILEOFFSOVFL: Attempted increase file offset past file size."; + _err_map[JERR_FCNTL_CMPLOFFSOVFL] = "JERR_FCNTL_CMPLOFFSOVFL: Attempted increase completed file offset past submitted offset."; + _err_map[JERR_FCNTL_RDOFFSOVFL] = "JERR_FCNTL_RDOFFSOVFL: Attempted increase read offset past write offset."; + + // class lfmgr + _err_map[JERR_LFMGR_BADAEFNUMLIM] = "JERR_LFMGR_BADAEFNUMLIM: Auto-expand file number limit lower than initial number of journal files."; + _err_map[JERR_LFMGR_AEFNUMLIMIT] = "JERR_LFMGR_AEFNUMLIMIT: Exceeded auto-expand file number limit."; + _err_map[JERR_LFMGR_AEDISABLED] = "JERR_LFMGR_AEDISABLED: Attempted to expand with auto-expand disabled."; + + // class rrfc + _err_map[JERR_RRFC_OPENRD] = "JERR_RRFC_OPENRD: Unable to open file for read."; + + // class jrec, enq_rec, deq_rec, txn_rec + _err_map[JERR_JREC_BADRECHDR] = "JERR_JREC_BADRECHDR: Invalid data record header."; + _err_map[JERR_JREC_BADRECTAIL] = "JERR_JREC_BADRECTAIL: Invalid data record tail."; + + // class wmgr + _err_map[JERR_WMGR_BADPGSTATE] = "JERR_WMGR_BADPGSTATE: Page buffer in illegal state for operation."; + _err_map[JERR_WMGR_BADDTOKSTATE] = "JERR_WMGR_BADDTOKSTATE: Data token in illegal state for operation."; + _err_map[JERR_WMGR_ENQDISCONT] = "JERR_WMGR_ENQDISCONT: Enqueued new dtok when previous enqueue returned partly completed (state ENQ_PART)."; + _err_map[JERR_WMGR_DEQDISCONT] = "JERR_WMGR_DEQDISCONT: Dequeued new dtok when previous dequeue returned partly completed (state DEQ_PART)."; + _err_map[JERR_WMGR_DEQRIDNOTENQ] = "JERR_WMGR_DEQRIDNOTENQ: Dequeue rid is not enqueued."; + + // class rmgr + _err_map[JERR_RMGR_UNKNOWNMAGIC] = "JERR_RMGR_UNKNOWNMAGIC: Found record with unknown magic."; + _err_map[JERR_RMGR_RIDMISMATCH] = "JERR_RMGR_RIDMISMATCH: RID mismatch between current record and dtok RID"; + //_err_map[JERR_RMGR_FIDMISMATCH] = "JERR_RMGR_FIDMISMATCH: FID mismatch between emap and rrfc"; + _err_map[JERR_RMGR_ENQSTATE] = "JERR_RMGR_ENQSTATE: Attempted read when data token wstate was not ENQ"; + _err_map[JERR_RMGR_BADRECTYPE] = "JERR_RMGR_BADRECTYPE: Attempted operation on inappropriate record type"; + + // class data_tok + _err_map[JERR_DTOK_ILLEGALSTATE] = "JERR_MTOK_ILLEGALSTATE: Attempted to change to illegal state."; + //_err_map[JERR_DTOK_RIDNOTSET] = "JERR_DTOK_RIDNOTSET: Record ID not set."; + + // class enq_map, txn_map + _err_map[JERR_MAP_DUPLICATE] = "JERR_MAP_DUPLICATE: Attempted to insert record into map using duplicate key."; + _err_map[JERR_MAP_NOTFOUND] = "JERR_MAP_NOTFOUND: Key not found in map."; + _err_map[JERR_MAP_LOCKED] = "JERR_MAP_LOCKED: Record ID locked by a pending transaction."; + + // class jinf + _err_map[JERR_JINF_CVALIDFAIL] = "JERR_JINF_CVALIDFAIL: Journal compatibility validation failure."; + _err_map[JERR_JINF_NOVALUESTR] = "JERR_JINF_NOVALUESTR: No value attribute found in jinf file."; + _err_map[JERR_JINF_BADVALUESTR] = "JERR_JINF_BADVALUESTR: Bad format for value attribute in jinf file"; + _err_map[JERR_JINF_JDATEMPTY] = "JERR_JINF_JDATEMPTY: Journal data files empty."; + _err_map[JERR_JINF_TOOMANYFILES] = "JERR_JINF_TOOMANYFILES: Too many journal data files."; + _err_map[JERR_JINF_INVALIDFHDR] = "JERR_JINF_INVALIDFHDR: Invalid journal data file header"; + _err_map[JERR_JINF_STAT] = "JERR_JINF_STAT: Error while trying to stat a journal data file"; + _err_map[JERR_JINF_NOTREGFILE] = "JERR_JINF_NOTREGFILE: Target journal data file is not a regular file"; + _err_map[JERR_JINF_BADFILESIZE] = "JERR_JINF_BADFILESIZE: Journal data file is of incorrect or unexpected size"; + _err_map[JERR_JINF_OWIBAD] = "JERR_JINF_OWIBAD: Journal data files have inconsistent OWI flags; >1 transition found in non-auto-expand or min-size journal"; + _err_map[JERR_JINF_ZEROLENFILE] = "JERR_JINF_ZEROLENFILE: Journal info file zero length"; + + //_err_map[] = ""; + + return true; +} + +const char* +jerrno::err_msg(const u_int32_t err_no) throw () +{ + _err_map_itr = _err_map.find(err_no); + if (_err_map_itr == _err_map.end()) + return "<Unknown error code>"; + return _err_map_itr->second; +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/jerrno.h b/cpp/src/qpid/legacystore/jrnl/jerrno.h new file mode 100644 index 0000000000..4c8b71c423 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jerrno.h @@ -0,0 +1,173 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jerrno.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::jerrno (journal error + * codes). See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_JERRNO_H +#define QPID_LEGACYSTORE_JRNL_JERRNO_H + +namespace mrg +{ +namespace journal +{ +class jerrno; +} +} + +#include <map> +#include <string> +#include <sys/types.h> + +namespace mrg +{ +namespace journal +{ + + /** + * \class jerrno + * \brief Class containing static error definitions and static map for error messages. + */ + class jerrno + { + static std::map<u_int32_t, const char*> _err_map; ///< Map of error messages + static std::map<u_int32_t, const char*>::iterator _err_map_itr; ///< Iterator + static bool _initialized; ///< Dummy flag, used to initialise map. + + public: + // generic errors + static const u_int32_t JERR__MALLOC; ///< Buffer memory allocation failed + static const u_int32_t JERR__UNDERFLOW; ///< Underflow error + static const u_int32_t JERR__NINIT; ///< Operation on uninitialized class + static const u_int32_t JERR__AIO; ///< AIO failure + static const u_int32_t JERR__FILEIO; ///< File read or write failure + static const u_int32_t JERR__RTCLOCK; ///< Reading real-time clock failed + static const u_int32_t JERR__PTHREAD; ///< pthread failure + static const u_int32_t JERR__TIMEOUT; ///< Timeout waiting for an event + static const u_int32_t JERR__UNEXPRESPONSE; ///< Unexpected response to call or event + static const u_int32_t JERR__RECNFOUND; ///< Record not found + static const u_int32_t JERR__NOTIMPL; ///< Not implemented + + // class jcntl + static const u_int32_t JERR_JCNTL_STOPPED; ///< Operation on stopped journal + static const u_int32_t JERR_JCNTL_READONLY; ///< Write operation on read-only journal + static const u_int32_t JERR_JCNTL_AIOCMPLWAIT; ///< Timeout waiting for AIOs to complete + static const u_int32_t JERR_JCNTL_UNKNOWNMAGIC; ///< Found record with unknown magic + static const u_int32_t JERR_JCNTL_NOTRECOVERED; ///< Req' recover() to be called first + static const u_int32_t JERR_JCNTL_RECOVERJFULL; ///< Journal data files full, cannot write + static const u_int32_t JERR_JCNTL_OWIMISMATCH; ///< OWI change found in unexpected location + + // class jdir + static const u_int32_t JERR_JDIR_NOTDIR; ///< Exists but is not a directory + static const u_int32_t JERR_JDIR_MKDIR; ///< Directory creation failed + static const u_int32_t JERR_JDIR_OPENDIR; ///< Directory open failed + static const u_int32_t JERR_JDIR_READDIR; ///< Directory read failed + static const u_int32_t JERR_JDIR_CLOSEDIR; ///< Directory close failed + static const u_int32_t JERR_JDIR_RMDIR; ///< Directory delete failed + static const u_int32_t JERR_JDIR_NOSUCHFILE; ///< File does not exist + static const u_int32_t JERR_JDIR_FMOVE; ///< File move failed + static const u_int32_t JERR_JDIR_STAT; ///< File stat failed + static const u_int32_t JERR_JDIR_UNLINK; ///< File delete failed + static const u_int32_t JERR_JDIR_BADFTYPE; ///< Bad or unknown file type (stat mode) + + // class fcntl + static const u_int32_t JERR_FCNTL_OPENWR; ///< Unable to open file for write + static const u_int32_t JERR_FCNTL_WRITE; ///< Unable to write to file + static const u_int32_t JERR_FCNTL_CLOSE; ///< File close failed + static const u_int32_t JERR_FCNTL_FILEOFFSOVFL; ///< Increased offset past file size + static const u_int32_t JERR_FCNTL_CMPLOFFSOVFL; ///< Increased cmpl offs past subm offs + static const u_int32_t JERR_FCNTL_RDOFFSOVFL; ///< Increased read offs past write offs + + // class lfmgr + static const u_int32_t JERR_LFMGR_BADAEFNUMLIM; ///< Bad auto-expand file number limit + static const u_int32_t JERR_LFMGR_AEFNUMLIMIT; ///< Exceeded auto-expand file number limit + static const u_int32_t JERR_LFMGR_AEDISABLED; ///< Attempted to expand with auto-expand disabled + + // class rrfc + static const u_int32_t JERR_RRFC_OPENRD; ///< Unable to open file for read + + // class jrec, enq_rec, deq_rec, txn_rec + static const u_int32_t JERR_JREC_BADRECHDR; ///< Invalid data record header + static const u_int32_t JERR_JREC_BADRECTAIL; ///< Invalid data record tail + + // class wmgr + static const u_int32_t JERR_WMGR_BADPGSTATE; ///< Page buffer in illegal state. + static const u_int32_t JERR_WMGR_BADDTOKSTATE; ///< Data token in illegal state. + static const u_int32_t JERR_WMGR_ENQDISCONT; ///< Enq. new dtok when previous part compl. + static const u_int32_t JERR_WMGR_DEQDISCONT; ///< Deq. new dtok when previous part compl. + static const u_int32_t JERR_WMGR_DEQRIDNOTENQ; ///< Deq. rid not enqueued + + // class rmgr + static const u_int32_t JERR_RMGR_UNKNOWNMAGIC; ///< Found record with unknown magic + static const u_int32_t JERR_RMGR_RIDMISMATCH; ///< RID mismatch between rec and dtok + //static const u_int32_t JERR_RMGR_FIDMISMATCH; ///< FID mismatch between emap and rrfc + static const u_int32_t JERR_RMGR_ENQSTATE; ///< Attempted read when wstate not ENQ + static const u_int32_t JERR_RMGR_BADRECTYPE; ///< Attempted op on incorrect rec type + + // class data_tok + static const u_int32_t JERR_DTOK_ILLEGALSTATE; ///< Attempted to change to illegal state +// static const u_int32_t JERR_DTOK_RIDNOTSET; ///< Record ID not set + + // class enq_map, txn_map + static const u_int32_t JERR_MAP_DUPLICATE; ///< Attempted to insert using duplicate key + static const u_int32_t JERR_MAP_NOTFOUND; ///< Key not found in map + static const u_int32_t JERR_MAP_LOCKED; ///< rid locked by pending txn + + // class jinf + static const u_int32_t JERR_JINF_CVALIDFAIL; ///< Compatibility validation failure + static const u_int32_t JERR_JINF_NOVALUESTR; ///< No value attr found in jinf file + static const u_int32_t JERR_JINF_BADVALUESTR; ///< Bad format for value attr in jinf file + static const u_int32_t JERR_JINF_JDATEMPTY; ///< Journal data files empty + static const u_int32_t JERR_JINF_TOOMANYFILES; ///< Too many journal data files + static const u_int32_t JERR_JINF_INVALIDFHDR; ///< Invalid file header + static const u_int32_t JERR_JINF_STAT; ///< Error while trying to stat a file + static const u_int32_t JERR_JINF_NOTREGFILE; ///< Target file is not a regular file + static const u_int32_t JERR_JINF_BADFILESIZE; ///< File is of incorrect or unexpected size + static const u_int32_t JERR_JINF_OWIBAD; ///< OWI inconsistent (>1 transition in non-ae journal) + static const u_int32_t JERR_JINF_ZEROLENFILE; ///< Journal info file is zero length (empty). + + // Negative returns for some functions + static const int32_t AIO_TIMEOUT; ///< Timeout waiting for AIO return + static const int32_t LOCK_TAKEN; ///< Attempted to take lock, but it was taken by another thread + /** + * \brief Method to access error message from known error number. + */ + static const char* err_msg(const u_int32_t err_no) throw (); + + private: + /** + * \brief Static function to initialize map. + */ + static bool __init(); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_JERRNO_H diff --git a/cpp/src/qpid/legacystore/jrnl/jexception.cpp b/cpp/src/qpid/legacystore/jrnl/jexception.cpp new file mode 100644 index 0000000000..5c571020e4 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jexception.cpp @@ -0,0 +1,183 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jexception.cpp + * + * Qpid asynchronous store plugin library + * + * Generic journal exception class mrg::journal::jexception. See comments + * in file jexception.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/jexception.h" + +#include <iomanip> +#include <sstream> +#include "qpid/legacystore/jrnl/jerrno.h" + +#define CATLEN(p) MAX_MSG_SIZE - std::strlen(p) - 1 + +namespace mrg +{ +namespace journal +{ + +jexception::jexception() throw (): + std::exception(), + _err_code(0) +{ + format(); +} + +jexception::jexception(const u_int32_t err_code) throw (): + std::exception(), + _err_code(err_code) +{ + format(); +} + +jexception::jexception(const char* additional_info) throw (): + std::exception(), + _err_code(0), + _additional_info(additional_info) +{ + format(); +} + +jexception::jexception(const std::string& additional_info) throw (): + std::exception(), + _err_code(0), + _additional_info(additional_info) +{ + format(); +} + +jexception::jexception(const u_int32_t err_code, const char* additional_info) throw (): + std::exception(), + _err_code(err_code), + _additional_info(additional_info) +{ + format(); +} + +jexception::jexception(const u_int32_t err_code, const std::string& additional_info) throw (): + std::exception(), + _err_code(err_code), + _additional_info(additional_info) +{ + format(); +} + +jexception::jexception(const u_int32_t err_code, const char* throwing_class, + const char* throwing_fn) throw (): + std::exception(), + _err_code(err_code), + _throwing_class(throwing_class), + _throwing_fn(throwing_fn) +{ + format(); +} + +jexception::jexception(const u_int32_t err_code, const std::string& throwing_class, + const std::string& throwing_fn) throw (): + std::exception(), + _err_code(err_code), + _throwing_class(throwing_class), + _throwing_fn(throwing_fn) +{ + format(); +} + +jexception::jexception(const u_int32_t err_code, const char* additional_info, + const char* throwing_class, const char* throwing_fn) throw (): + std::exception(), + _err_code(err_code), + _additional_info(additional_info), + _throwing_class(throwing_class), + _throwing_fn(throwing_fn) +{ + format(); +} + +jexception::jexception(const u_int32_t err_code, const std::string& additional_info, + const std::string& throwing_class, const std::string& throwing_fn) throw (): + std::exception(), + _err_code(err_code), + _additional_info(additional_info), + _throwing_class(throwing_class), + _throwing_fn(throwing_fn) +{ + format(); +} + +jexception::~jexception() throw () +{} + +void +jexception::format() +{ + const bool ai = !_additional_info.empty(); + const bool tc = !_throwing_class.empty(); + const bool tf = !_throwing_fn.empty(); + std::ostringstream oss; + oss << "jexception 0x" << std::hex << std::setfill('0') << std::setw(4) << _err_code << " "; + if (tc) + { + oss << _throwing_class; + if (tf) + oss << "::"; + else + oss << " "; + } + if (tf) + oss << _throwing_fn << "() "; + if (tc || tf) + oss << "threw " << jerrno::err_msg(_err_code); + if (ai) + oss << " (" << _additional_info << ")"; + _what.assign(oss.str()); +} + +const char* +jexception::what() const throw () +{ + return _what.c_str(); +} + +std::ostream& +operator<<(std::ostream& os, const jexception& je) +{ + os << je.what(); + return os; +} + +std::ostream& +operator<<(std::ostream& os, const jexception* jePtr) +{ + os << jePtr->what(); + return os; +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/jexception.h b/cpp/src/qpid/legacystore/jrnl/jexception.h new file mode 100644 index 0000000000..34d8373235 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jexception.h @@ -0,0 +1,142 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jexception.h + * + * Qpid asynchronous store plugin library + * + * Generic journal exception class mrg::journal::jexception (derived + * from class std::exception). Intended to serve as a common exception + * class for all more speicalized exceptions in the message journal. See + * class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_JEXCEPTION_H +#define QPID_LEGACYSTORE_JRNL_JEXCEPTION_H + +namespace mrg +{ +namespace journal +{ +class jexception; +} +} + +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <exception> +#include "qpid/legacystore/jrnl/jerrno.h" +#include <sstream> +#include <string> +#include <sys/types.h> + +// Macro for formatting commom system errors +#define FORMAT_SYSERR(errno) " errno=" << errno << " (" << std::strerror(errno) << ")" + +#define MALLOC_CHK(ptr, var, cls, fn) if(ptr == 0) { \ + clean(); \ + std::ostringstream oss; \ + oss << var << ": malloc() failed: " << FORMAT_SYSERR(errno); \ + throw jexception(jerrno::JERR__MALLOC, oss.str(), cls, fn); \ + } + +// TODO: The following is a temporary bug-tracking aid which forces a core. +// Replace with the commented out version below when BZ484048 is resolved. +#define PTHREAD_CHK(err, pfn, cls, fn) if(err != 0) { \ + std::ostringstream oss; \ + oss << cls << "::" << fn << "(): " << pfn; \ + errno = err; \ + ::perror(oss.str().c_str()); \ + ::abort(); \ + } +/* +#define PTHREAD_CHK(err, pfn, cls, fn) if(err != 0) { \ + std::ostringstream oss; \ + oss << pfn << " failed: " << FORMAT_SYSERR(err); \ + throw jexception(jerrno::JERR__PTHREAD, oss.str(), cls, fn); \ + } +*/ + +#define ASSERT(cond, msg) if(cond == 0) { \ + std::cerr << msg << std::endl; \ + ::abort(); \ + } + +namespace mrg +{ +namespace journal +{ + /** + * \class jexception + * \brief Generic journal exception class + */ + class jexception : public std::exception + { + private: + u_int32_t _err_code; + std::string _additional_info; + std::string _throwing_class; + std::string _throwing_fn; + std::string _what; + void format(); + + public: + jexception() throw (); + + jexception(const u_int32_t err_code) throw (); + + jexception(const char* additional_info) throw (); + jexception(const std::string& additional_info) throw (); + + jexception(const u_int32_t err_code, const char* additional_info) throw (); + jexception(const u_int32_t err_code, const std::string& additional_info) throw (); + + jexception(const u_int32_t err_code, const char* throwing_class, const char* throwing_fn) + throw (); + jexception(const u_int32_t err_code, const std::string& throwing_class, + const std::string& throwing_fn) throw (); + + jexception(const u_int32_t err_code, const char* additional_info, + const char* throwing_class, const char* throwing_fn) throw (); + jexception(const u_int32_t err_code, const std::string& additional_info, + const std::string& throwing_class, const std::string& throwing_fn) throw (); + + virtual ~jexception() throw (); + virtual const char* what() const throw (); // override std::exception::what() + + inline u_int32_t err_code() const throw () { return _err_code; } + inline const std::string additional_info() const throw () { return _additional_info; } + inline const std::string throwing_class() const throw () { return _throwing_class; } + inline const std::string throwing_fn() const throw () { return _throwing_fn; } + + friend std::ostream& operator<<(std::ostream& os, const jexception& je); + friend std::ostream& operator<<(std::ostream& os, const jexception* jePtr); + }; // class jexception + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_JEXCEPTION_H diff --git a/cpp/src/qpid/legacystore/jrnl/jinf.cpp b/cpp/src/qpid/legacystore/jrnl/jinf.cpp new file mode 100644 index 0000000000..4117bd3581 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jinf.cpp @@ -0,0 +1,540 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jinf.cpp + * + * Qpid asynchronous store plugin library + * + * This file contains the code for the mrg::journal::jinf class. + * + * See jinf.h comments for details of this class. + * + * \author Kim van der Riet + */ + +#include "jrnl/jinf.h" + +#include <cstdlib> +#include <cstring> +#include <ctime> +#include <fstream> +#include "qpid/legacystore/jrnl/file_hdr.h" +#include "qpid/legacystore/jrnl/jcntl.h" +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/lp_map.h" +#include <sstream> +#include <sys/stat.h> + +namespace mrg +{ +namespace journal +{ + +jinf::jinf(const std::string& jinf_filename, bool validate_flag): + _jver(0), + _filename(jinf_filename), + _num_jfiles(0), + _ae(false), + _ae_max_jfiles(0), + _jfsize_sblks(0), + _sblk_size_dblks(0), + _dblk_size(0), + _wcache_pgsize_sblks(0), + _wcache_num_pages(0), + _rcache_pgsize_sblks(0), + _rcache_num_pages(0), + _tm_ptr(0), + _valid_flag(false), + _analyzed_flag(false), + _initial_owi(false), + _frot(false) +{ + read(_filename); + if (validate_flag) + validate(); +} + +jinf::jinf(const std::string& jid, const std::string& jdir, const std::string& base_filename, const u_int16_t num_jfiles, + const bool auto_expand, const u_int16_t ae_max_jfiles, const u_int32_t jfsize_sblks, + const u_int32_t wcache_pgsize_sblks, const u_int16_t wcache_num_pages, const timespec& ts): + _jver(RHM_JDAT_VERSION), + _jid(jid), + _jdir(jdir), + _base_filename(base_filename), + _ts(ts), + _num_jfiles(num_jfiles), + _ae(auto_expand), + _ae_max_jfiles(ae_max_jfiles), + _jfsize_sblks(jfsize_sblks), + _sblk_size_dblks(JRNL_SBLK_SIZE), + _dblk_size(JRNL_DBLK_SIZE), + _wcache_pgsize_sblks(wcache_pgsize_sblks), + _wcache_num_pages(wcache_num_pages), + _rcache_pgsize_sblks(JRNL_RMGR_PAGE_SIZE), + _rcache_num_pages(JRNL_RMGR_PAGES), + _tm_ptr(std::localtime(&ts.tv_sec)), + _valid_flag(false), + _analyzed_flag(false), + _initial_owi(false) +{ + set_filename(); +} + +jinf::~jinf() +{} + +void +jinf::validate() +{ + bool err = false; + std::ostringstream oss; + if (_jver != RHM_JDAT_VERSION) + { + oss << "File \"" << _filename << "\": "; + oss << "RHM_JDAT_VERSION mismatch: found=" << (int)_jver; + oss << "; required=" << RHM_JDAT_VERSION << std::endl; + err = true; + } + if (_num_jfiles < JRNL_MIN_NUM_FILES) + { + oss << "File \"" << _filename << "\": "; + oss << "Number of journal files too small: found=" << _num_jfiles; + oss << "; minimum=" << JRNL_MIN_NUM_FILES << std::endl; + err = true; + } + if (_num_jfiles > JRNL_MAX_NUM_FILES) + { + oss << "File \"" << _filename << "\": "; + oss << "Number of journal files too large: found=" << _num_jfiles; + oss << "; maximum=" << JRNL_MAX_NUM_FILES << std::endl; + err = true; + } + if (_ae) + { + if (_ae_max_jfiles < _num_jfiles) + { + oss << "File \"" << _filename << "\": "; + oss << "Number of journal files exceeds auto-expansion limit: found=" << _num_jfiles; + oss << "; maximum=" << _ae_max_jfiles; + err = true; + } + if (_ae_max_jfiles > JRNL_MAX_NUM_FILES) + { + oss << "File \"" << _filename << "\": "; + oss << "Auto-expansion file limit too large: found=" << _ae_max_jfiles; + oss << "; maximum=" << JRNL_MAX_NUM_FILES; + err = true; + } + } + if (_jfsize_sblks < JRNL_MIN_FILE_SIZE) + { + oss << "File \"" << _filename << "\": "; + oss << "Journal file size too small: found=" << _jfsize_sblks; + oss << "; minimum=" << JRNL_MIN_FILE_SIZE << " (sblks)" << std::endl; + err = true; + } + if (_sblk_size_dblks != JRNL_SBLK_SIZE) + { + oss << "File \"" << _filename << "\": "; + oss << "JRNL_SBLK_SIZE mismatch: found=" << _sblk_size_dblks; + oss << "; required=" << JRNL_SBLK_SIZE << std::endl; + err = true; + } + if (_dblk_size != JRNL_DBLK_SIZE) + { + oss << "File \"" << _filename << "\": "; + oss << "JRNL_DBLK_SIZE mismatch: found=" << _dblk_size; + oss << "; required=" << JRNL_DBLK_SIZE << std::endl; + err = true; + } + if (err) + throw jexception(jerrno::JERR_JINF_CVALIDFAIL, oss.str(), "jinf", "validate"); + _valid_flag = true; +} + +void +jinf::analyze() +{ + lp_map early_map; // map for all owi flags same as pfid 0 + lp_map late_map; // map for all owi flags opposite to pfid 0 + bool late_latch = false; // latch for owi switchover + + if (!_valid_flag) + validate(); + bool done = false; + for (u_int16_t pfid=0; pfid<_num_jfiles && !done; pfid++) + { + std::ostringstream oss; + if (_jdir.at(_jdir.size() - 1) == '/') + oss << _jdir << _base_filename << "."; + else + oss << _jdir << "/" << _base_filename << "."; + oss << std::setw(4) << std::setfill('0') << std::hex << pfid; + oss << "." << JRNL_DATA_EXTENSION; + + // Check size of each file is consistent and expected + u_int32_t fsize = get_filesize(oss.str()); + if (fsize != (_jfsize_sblks + 1) * _sblk_size_dblks * _dblk_size) + { + std::ostringstream oss1; + oss1 << "File \"" << oss.str() << "\": size=" << fsize << "; expected=" << ((_jfsize_sblks + 1) * _sblk_size_dblks * _dblk_size); + throw jexception(jerrno::JERR_JINF_BADFILESIZE, oss1.str(), "jinf", "analyze"); + } + + std::ifstream jifs(oss.str().c_str()); + if (!jifs.good()) + throw jexception(jerrno::JERR__FILEIO, oss.str(), "jinf", "analyze"); + file_hdr fhdr; + jifs.read((char*)&fhdr, sizeof(fhdr)); + if (fhdr._magic != RHM_JDAT_FILE_MAGIC) // No file header + { + if (fhdr._magic != 0) + throw jexception(jerrno::JERR_JINF_INVALIDFHDR, oss.str(), "jinf", "analyze"); + if (!pfid) // pfid 0 == lid 0 cannot be empty + throw jexception(jerrno::JERR_JINF_JDATEMPTY, oss.str(), "jinf", "analyze"); + _frot = true; + done = true; + } + else + { + assert(pfid == fhdr._pfid); + if (pfid == 0) + { + _initial_owi = fhdr.get_owi(); + early_map.insert(fhdr._lfid, pfid); + } + else + { + if (_initial_owi == fhdr.get_owi()) + { + early_map.insert(fhdr._lfid, pfid); + if (late_latch && (!_ae || _num_jfiles == JRNL_MIN_NUM_FILES)) + throw jexception(jerrno::JERR_JINF_OWIBAD, oss.str(), "jinf", "analyze"); + } + else + { + late_map.insert(fhdr._lfid, pfid); + late_latch = true; + } + } + } + jifs.close(); + } // for (pfid) + + // If this is not the first rotation, all files should be in either early or late maps + if (!_frot) assert(early_map.size() + late_map.size() == _num_jfiles); + + _pfid_list.clear(); + late_map.get_pfid_list(_pfid_list); + early_map.get_pfid_list(_pfid_list); + + // Check OWI consistency +// for (u_int16_t lfid=0; lfid<_num_jfiles && !done; lfid++) +// { +// throw jexception(jerrno::JERR_JINF_OWIBAD, oss.str(), "jinf", "analyze"); +// } + + _analyzed_flag = true; +} + +void +jinf::write() +{ + std::ostringstream oss; + oss << _jdir << "/" << _base_filename << "." << JRNL_INFO_EXTENSION; + std::ofstream of(oss.str().c_str(), std::ofstream::out | std::ofstream::trunc); + if (!of.good()) + throw jexception(jerrno::JERR__FILEIO, oss.str(), "jinf", "write"); + of << xml_str(); + of.close(); +} + +u_int16_t +jinf::incr_num_jfiles() +{ + if (_num_jfiles >= JRNL_MAX_NUM_FILES) + throw jexception(jerrno::JERR_JINF_TOOMANYFILES, "jinf", "incr_num_jfiles"); + return ++_num_jfiles; +} + +u_int16_t +jinf::get_first_pfid() +{ + if (!_analyzed_flag) + analyze(); + return *_pfid_list.begin(); +} + +u_int16_t +jinf::get_last_pfid() +{ + if (!_analyzed_flag) + analyze(); + return *_pfid_list.rbegin(); +} + +jinf::pfid_list& +jinf::get_pfid_list() +{ + if (!_analyzed_flag) + analyze(); + return _pfid_list; +} + +void +jinf::get_normalized_pfid_list(pfid_list& pfid_list) +{ + if (!_analyzed_flag) + analyze(); + pfid_list.clear(); + u_int16_t s = _pfid_list.size(); + u_int16_t iz = 0; // index of 0 value + while (_pfid_list[iz] && iz < s) + iz++; + assert(_pfid_list[iz] == 0); + for (u_int16_t i = iz; i < iz + s; i++) + pfid_list.push_back(_pfid_list[i % s]); + assert(pfid_list[0] == 0); + assert(pfid_list.size() == s); +} + +bool +jinf::get_initial_owi() +{ + if (!_analyzed_flag) + analyze(); + return _initial_owi; +} + +bool +jinf::get_frot() +{ + if (!_analyzed_flag) + analyze(); + return _frot; +} + +std::string +jinf::to_string() const +{ + std::ostringstream oss; + oss << std::setfill('0'); + oss << "Journal ID \"" << _jid << "\" initialized " << (_tm_ptr->tm_year + 1900) << "/"; + oss << std::setw(2) << (_tm_ptr->tm_mon + 1) << "/" << std::setw(2) << _tm_ptr->tm_mday << " "; + oss << std::setw(2) << _tm_ptr->tm_hour << ":" << std::setw(2) << _tm_ptr->tm_min << ":"; + oss << std::setw(2) << _tm_ptr->tm_sec << "." << std::setw(9) << _ts.tv_nsec << ":" << std::endl; + oss << " Journal directory: \"" << _jdir << "\"" << std::endl; + oss << " Journal base filename: \"" << _base_filename << "\"" << std::endl; + oss << " Journal version: " << (unsigned)_jver << std::endl; + oss << " Number of journal files: " << _num_jfiles << std::endl; +// TODO: Uncomment these lines when auto-expand is enabled. +// oss << " Auto-expand mode: " << (_ae ? "enabled" : "disabled") << std::endl; +// if (_ae) oss << " Max. number of journal files (in auto-expand mode): " << _ae_max_jfiles << std::endl; + oss << " Journal file size: " << _jfsize_sblks << " sblks" << std::endl; + oss << " Softblock size (JRNL_SBLK_SIZE): " << _sblk_size_dblks << " dblks" << std::endl; + oss << " Datablock size (JRNL_DBLK_SIZE): " << _dblk_size << " bytes" << std::endl; + oss << " Write page size: " << _wcache_pgsize_sblks << " sblks" << std::endl; + oss << " Number of write pages: " << _wcache_num_pages << std::endl; + oss << " Read page size (JRNL_RMGR_PAGE_SIZE): " << _rcache_pgsize_sblks << " sblks" << std::endl; + oss << " Number of read pages (JRNL_RMGR_PAGES): " << _rcache_num_pages << std::endl; + return oss.str(); +} + +std::string +jinf::xml_str() const +{ + // TODO: This is *not* an XML writer, rather for simplicity, it uses literals. I'm sure a more elegant way can be + // found to do this using the real thing... + + std::ostringstream oss; + oss << std::setfill('0'); + oss << "<?xml version=\"1.0\" ?>" << std::endl; + oss << "<jrnl>" << std::endl; + oss << " <journal_version value=\"" << (unsigned)_jver << "\" />" << std::endl; + oss << " <journal_id>" << std::endl; + oss << " <id_string value=\"" << _jid << "\" />" << std::endl; + oss << " <directory value=\"" << _jdir << "\" />" << std::endl; + oss << " <base_filename value=\"" << _base_filename << "\" />" << std::endl; + oss << " </journal_id>" << std::endl; + oss << " <creation_time>" << std::endl; + oss << " <seconds value=\"" << _ts.tv_sec << "\" />" << std::endl; + oss << " <nanoseconds value=\"" << _ts.tv_nsec << "\" />" << std::endl; + oss << " <string value=\"" << (_tm_ptr->tm_year + 1900) << "/"; + oss << std::setw(2) << (_tm_ptr->tm_mon + 1) << "/" << std::setw(2) << _tm_ptr->tm_mday << " "; + oss << std::setw(2) << _tm_ptr->tm_hour << ":" << std::setw(2) << _tm_ptr->tm_min << ":"; + oss << std::setw(2) << _tm_ptr->tm_sec << "." << std::setw(9) << _ts.tv_nsec; + oss << "\" />" << std::endl; + oss << " </creation_time>" << std::endl; + oss << " <journal_file_geometry>" << std::endl; + oss << " <number_jrnl_files value=\"" << _num_jfiles << "\" />" << std::endl; + oss << " <auto_expand value=\"" << (_ae ? "true" : "false") << "\" />" << std::endl; + if (_ae) oss << " <auto_expand_max_jrnl_files value=\"" << _ae_max_jfiles << "\" />" << std::endl; + oss << " <jrnl_file_size_sblks value=\"" << _jfsize_sblks << "\" />" << std::endl; + oss << " <JRNL_SBLK_SIZE value=\"" << _sblk_size_dblks << "\" />" << std::endl; + oss << " <JRNL_DBLK_SIZE value=\"" << _dblk_size << "\" />" << std::endl; + oss << " </journal_file_geometry>" << std::endl; + oss << " <cache_geometry>" << std::endl; + oss << " <wcache_pgsize_sblks value=\"" << _wcache_pgsize_sblks << "\" />" << std::endl; + oss << " <wcache_num_pages value=\"" << _wcache_num_pages << "\" />" << std::endl; + oss << " <JRNL_RMGR_PAGE_SIZE value=\"" << _rcache_pgsize_sblks << "\" />" << std::endl; + oss << " <JRNL_RMGR_PAGES value=\"" << _rcache_num_pages << "\" />" << std::endl; + oss << " </cache_geometry>" << std::endl; + oss << "</jrnl>" << std::endl; + return oss.str(); +} + +void +jinf::set_filename() +{ + std::ostringstream oss; + oss << _jdir << "/" << _base_filename << "." << JRNL_INFO_EXTENSION; + _filename = oss.str().c_str(); +} + +void +jinf::read(const std::string& jinf_filename) +{ + // TODO: This is *not* an XML reader, rather for simplicity, it is a brute-force line reader which relies on string + // recognition. It relies on the format of xml_str() above; it will not handle a XML restructuring. + // *** Can it be replaced cheaply by a real XML reader? Should it be, or is this sufficient? *** + + char buff[1024]; // limit of line input length + std::ifstream jinfs(jinf_filename.c_str()); + if (!jinfs.good()) + throw jexception(jerrno::JERR__FILEIO, jinf_filename.c_str(), "jinf", "read"); + u_int32_t charcnt = 0; + while (jinfs.good()) + { + jinfs.getline(buff, 1023); + charcnt += std::strlen(buff); + if (std::strstr(buff, "journal_version")) + _jver = u_int16_value(buff); + else if(std::strstr(buff, "id_string")) + string_value(_jid, buff); + else if(std::strstr(buff, "directory")) + string_value(_jdir, buff); + else if(std::strstr(buff, "base_filename")) + string_value(_base_filename, buff); + else if(std::strstr(buff, "number_jrnl_files")) + _num_jfiles = u_int16_value(buff); + else if(std::strstr(buff, "auto_expand_max_jrnl_files")) + _ae_max_jfiles = u_int16_value(buff); + else if(std::strstr(buff, "auto_expand")) + _ae = bool_value(buff); + else if(std::strstr(buff, "jrnl_file_size_sblks")) + _jfsize_sblks = u_int32_value(buff); + else if(std::strstr(buff, "JRNL_SBLK_SIZE")) + _sblk_size_dblks = u_int16_value(buff); + else if(std::strstr(buff, "JRNL_DBLK_SIZE")) + _dblk_size = u_int32_value(buff); + else if(std::strstr(buff, "wcache_pgsize_sblks")) + _wcache_pgsize_sblks = u_int32_value(buff); + else if(std::strstr(buff, "wcache_num_pages")) + _wcache_num_pages = u_int32_value(buff); + else if(std::strstr(buff, "JRNL_RMGR_PAGE_SIZE")) + _rcache_pgsize_sblks = u_int32_value(buff); + else if(std::strstr(buff, "JRNL_RMGR_PAGES")) + _rcache_num_pages = u_int32_value(buff); + else if(std::strstr(buff, "nanoseconds")) + _ts.tv_nsec = u_int32_value(buff); + else if(std::strstr(buff, "seconds")) + { + _ts.tv_sec = u_int32_value(buff); + _tm_ptr = std::localtime(&_ts.tv_sec); + } + } + jinfs.close(); + if (charcnt == 0) + throw jexception(jerrno::JERR_JINF_ZEROLENFILE, jinf_filename.c_str(), "jinf", "read"); +} + +bool +jinf::bool_value(char* line) const +{ + return std::strcmp(find_value(line), "true") == 0; +} + +u_int16_t +jinf::u_int16_value(char* line) const +{ + return std::atoi(find_value(line)); +} + +u_int32_t +jinf::u_int32_value(char* line) const +{ + return std::atol(find_value(line)); +} + +std::string& +jinf::string_value(std::string& str, char* line) const +{ + str.assign(find_value(line)); + return str; +} + +char* +jinf::find_value(char* line) const +{ + const char* target1_str = "value=\""; + int target2_char = '\"'; + char* t1 = std::strstr(line, target1_str); + if (t1 == 0) + { + std::ostringstream oss; + oss << "File \"" << _filename << "\": line=" << line; + throw jexception(jerrno::JERR_JINF_NOVALUESTR, oss.str(), "jinf", "find_value"); + } + t1 += std::strlen(target1_str); + + char* t2 = std::strchr(t1, target2_char); + if (t2 == 0) + { + std::ostringstream oss; + oss << "File \"" << _filename << "\": line=" << line; + throw jexception(jerrno::JERR_JINF_BADVALUESTR, oss.str(), "jinf", "find_value"); + } + *t2 = '\0'; + return t1; +} + +u_int32_t +jinf::get_filesize(const std::string& file_name) const +{ + struct stat s; + if (::stat(file_name.c_str(), &s)) + { + std::ostringstream oss; + oss << "stat: file=\"" << file_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JINF_STAT, oss.str(), "jinf", "get_filesize"); + } + if (!S_ISREG(s.st_mode)) // not a regular file, + { + std::ostringstream oss; + oss << "File \"" << file_name << "\" is not a regular file: mode=0x" << std::hex << s.st_mode; + throw jexception(jerrno::JERR_JINF_NOTREGFILE, oss.str(), "jinf", "get_filesize"); + } + return u_int32_t(s.st_size); +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/jinf.h b/cpp/src/qpid/legacystore/jrnl/jinf.h new file mode 100644 index 0000000000..73f5386a19 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jinf.h @@ -0,0 +1,133 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jinf.h + * + * Qpid asynchronous store plugin library + * + * This file contains the code for the mrg::journal::jinf class. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_JINF_H +#define QPID_LEGACYSTORE_JRNL_JINF_H + +#include <ctime> +#include <string> +#include <sys/types.h> +#include <vector> + +namespace mrg +{ +namespace journal +{ + /** + * \class jinf + * \brief Class to handle the journal information file <basename>.jinf. + */ + class jinf + { + public: + typedef std::vector<u_int16_t> pfid_list; // pfids + typedef pfid_list::const_iterator pfidl_citr; + + private: + u_int8_t _jver; + std::string _jid; + std::string _jdir; + std::string _base_filename; + std::string _filename; + timespec _ts; + u_int16_t _num_jfiles; + bool _ae; + u_int32_t _ae_max_jfiles; + u_int32_t _jfsize_sblks; + u_int16_t _sblk_size_dblks; + u_int32_t _dblk_size; + u_int32_t _wcache_pgsize_sblks; + u_int16_t _wcache_num_pages; + u_int32_t _rcache_pgsize_sblks; + u_int16_t _rcache_num_pages; + std::tm* _tm_ptr; + bool _valid_flag; + bool _analyzed_flag; + pfid_list _pfid_list; + bool _initial_owi; + bool _frot; + + public: + // constructor for reading existing jinf file + jinf(const std::string& jinf_filename, bool validate_flag); + // constructor for writing jinf file + jinf(const std::string& jid, const std::string& jdir, const std::string& base_filename, + const u_int16_t num_jfiles, const bool auto_expand, const u_int16_t ae_max_jfiles, + const u_int32_t jfsize_sblks, const u_int32_t wcache_pgsize_sblks, const u_int16_t wcache_num_pages, + const timespec& ts); + virtual ~jinf(); + + void validate(); + void analyze(); + void write(); + + inline u_int8_t jver() const { return _jver; } + inline const std::string& jid() const { return _jid; } + inline const std::string& jdir() const { return _jdir; } + inline void set_jdir(const std::string& jdir) { _jdir = jdir; } + inline const std::string& base_filename() const { return _base_filename; } + inline const timespec& ts() const { return _ts; } + inline u_int16_t num_jfiles() const { return _num_jfiles; } + u_int16_t incr_num_jfiles(); + inline bool is_ae() const { return _ae; } + inline u_int16_t ae_max_jfiles() const { return _ae_max_jfiles; } + inline u_int32_t jfsize_sblks() const { return _jfsize_sblks; } + inline u_int16_t sblk_size_dblks() const { return _sblk_size_dblks; } + inline u_int32_t dblk_size() const { return _dblk_size; } + inline u_int32_t wcache_pgsize_sblks() const { return _wcache_pgsize_sblks; } + inline u_int16_t wcache_num_pages() const { return _wcache_num_pages; } + inline u_int32_t rcache_pgsize_sblks() const { return _rcache_pgsize_sblks; } + inline u_int16_t rcache_num_pages() const { return _rcache_num_pages; } + u_int16_t get_first_pfid(); + u_int16_t get_last_pfid(); + pfid_list& get_pfid_list(); + void get_normalized_pfid_list(pfid_list& pfid_list); + bool get_initial_owi(); + bool get_frot(); + + std::string to_string() const; + std::string xml_str() const; + + private: + void set_filename(); + void read(const std::string& jinf_filename); + bool bool_value(char* line) const; + u_int16_t u_int16_value(char* line) const; + u_int32_t u_int32_value(char* line) const; + std::string& string_value(std::string& str, char* line) const; + char* find_value(char* line) const; + u_int32_t get_filesize(const std::string& file_name) const; + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_JINF_H diff --git a/cpp/src/qpid/legacystore/jrnl/jrec.cpp b/cpp/src/qpid/legacystore/jrnl/jrec.cpp new file mode 100644 index 0000000000..61b9b6cc9b --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jrec.cpp @@ -0,0 +1,119 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jrec.cpp + * + * Qpid asynchronous store plugin library + * + * File containing source code for class mrg::journal::jrec (abstract journal + * jrecord). See comments in file jrec.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/jrec.h" + +#include <iomanip> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +jrec::jrec() {} +jrec::~jrec() {} + +void +jrec::chk_hdr(const rec_hdr& hdr) +{ + if (hdr._magic == 0) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "enq magic NULL: rid=0x" << hdr._rid; + throw jexception(jerrno::JERR_JREC_BADRECHDR, oss.str(), "jrec", "chk_hdr"); + } + if (hdr._version != RHM_JDAT_VERSION) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "version: rid=0x" << hdr._rid; + oss << ": expected=0x" << std::setw(2) << (int)RHM_JDAT_VERSION; + oss << " read=0x" << std::setw(2) << (int)hdr._version; + throw jexception(jerrno::JERR_JREC_BADRECHDR, oss.str(), "jrec", "chk_hdr"); + } +#if defined (JRNL_LITTLE_ENDIAN) + u_int8_t endian_flag = RHM_LENDIAN_FLAG; +#else + u_int8_t endian_flag = RHM_BENDIAN_FLAG; +#endif + if (hdr._eflag != endian_flag) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "endian_flag: rid=" << hdr._rid; + oss << ": expected=0x" << std::setw(2) << (int)endian_flag; + oss << " read=0x" << std::setw(2) << (int)hdr._eflag; + throw jexception(jerrno::JERR_JREC_BADRECHDR, oss.str(), "jrec", "chk_hdr"); + } +} + +void +jrec::chk_rid(const rec_hdr& hdr, const u_int64_t rid) +{ + if (hdr._rid != rid) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "rid mismatch: expected=0x" << rid; + oss << " read=0x" << hdr._rid; + throw jexception(jerrno::JERR_JREC_BADRECHDR, oss.str(), "jrec", "chk_hdr"); + } +} + +void +jrec::chk_tail(const rec_tail& tail, const rec_hdr& hdr) +{ + if (tail._xmagic != ~hdr._magic) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "magic: rid=0x" << hdr._rid; + oss << ": expected=0x" << ~hdr._magic; + oss << " read=0x" << tail._xmagic; + throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "jrec", "chk_tail"); + } + if (tail._rid != hdr._rid) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "rid: rid=0x" << hdr._rid; + oss << ": read=0x" << tail._rid; + throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "jrec", "chk_tail"); + } +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/jrec.h b/cpp/src/qpid/legacystore/jrnl/jrec.h new file mode 100644 index 0000000000..9d0771cabd --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/jrec.h @@ -0,0 +1,183 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file jrec.h + * + * Qpid asynchronous store plugin library + * + * File containing source code for class mrg::journal::jrec (abstract journal + * jrecord). See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_JREC_H +#define QPID_LEGACYSTORE_JRNL_JREC_H + +namespace mrg +{ +namespace journal +{ +class jrec; +} +} + +#include <cstddef> +#include <fstream> +#include "qpid/legacystore/jrnl/rec_hdr.h" +#include "qpid/legacystore/jrnl/rec_tail.h" +#include <string> +#include <sys/types.h> + +namespace mrg +{ +namespace journal +{ + + /** + * \class jrec + * \brief Abstract class for all file jrecords, both data and log. This class establishes + * the common data format and structure for these jrecords. + */ + class jrec + { + public: + jrec(); + virtual ~jrec(); + + /** + * \brief Encode this instance of jrec into the write buffer at the disk-block-aligned + * pointer wptr starting at position rec_offs_dblks in the encoded record to a + * maximum size of max_size_dblks. + * + * This call encodes the content of the data contianed in this instance of jrec into a + * disk-softblock-aligned (defined by JRNL_SBLK_SIZE) buffer pointed to by parameter + * wptr. No more than paramter max_size_dblks data-blocks may be written to the buffer. + * The parameter rec_offs_dblks is the offset in data-blocks within the fully encoded + * data block this instance represents at which to start encoding. + * + * Encoding entails writing the record header (struct enq_hdr), the data and the record tail + * (struct enq_tail). The record must be data-block-aligned (defined by JRNL_DBLK_SIZE), + * thus any remaining space in the final data-block is ignored; the returned value is the + * number of data-blocks consumed from the page by the encode action. Provided the initial + * alignment requirements are met, records may be of arbitrary size and may span multiple + * data-blocks, disk-blocks and/or pages. + * + * Since the record size in data-blocks is known, the general usage pattern is to call + * encode() as many times as is needed to fully encode the data. Each call to encode() + * will encode as much of the record as it can to what remains of the current page cache, + * and will return the number of data-blocks actually encoded. + * + * <b>Example:</b> Assume that record r1 was previously written to page 0, and that this + * is an instance representing record r2. Being larger than the page size ps, r2 would span + * multiple pages as follows: + * <pre> + * |<---ps--->| + * +----------+----------+----------+----... + * | |r2a| r2b | r2c | | + * |<-r1-><----------r2----------> | + * +----------+----------+----------+----... + * page: p0 p1 p2 + * </pre> + * Encoding record r2 will require multiple calls to encode; one for each page which + * is involved. Record r2 is divided logically into sections r2a, r2b and r2c at the + * points where the page boundaries intersect with the record. Assuming a page size + * of ps, the page boundary pointers are represented by their names p0, p1... and the + * sizes of the record segments are represented by their names r1, r2a, r2b..., the calls + * should be as follows: + * <pre> + * encode(p0+r1, 0, ps-r1); (returns r2a data-blocks) + * encode(p1, r2a, ps); (returns r2b data-blocks which equals ps) + * encode(p2, r2a+r2b, ps); (returns r2c data-blocks) + * </pre> + * + * \param wptr Data-block-aligned pointer to position in page buffer where encoding is to + * take place. + * \param rec_offs_dblks Offset in data-blocks within record from which to start encoding. + * \param max_size_dblks Maximum number of data-blocks to write to pointer wptr. + * \returns Number of data-blocks encoded. + */ + virtual u_int32_t encode(void* wptr, u_int32_t rec_offs_dblks, + u_int32_t max_size_dblks) = 0; + + /** + * \brief Decode into this instance of jrec from the read buffer at the disk-block-aligned + * pointer rptr starting at position jrec_offs_dblks in the encoded record to a + * maximum size of max_size_blks. + * + * This call decodes a record in the page buffer pointed to by the data-block-aligned + * (defined by JRNL_DBLK_SIZE) parameter rptr into this instance of jrec. No more than + * paramter max_size_dblks data-blocks may be read from the buffer. The parameter + * jrec_offs_dblks is the offset in data-blocks within the encoded record at which to start + * decoding. + * + * Decoding entails reading the record header, the data and the tail. The record is + * data-block-aligned (defined by JRNL_DBLK_SIZE); the returned value is the number of + * data-blocks read from the buffer by the decode action. As the record data size is only + * known once the header is read, the number of calls required to complete reading the + * record will depend on the vlaues within this instance which are set when the + * header is decoded. + * + * A non-zero value for jrec_offs_dblks implies that this is not the first call to + * decode and the record data will be appended at this offset. + * + * \param h Reference to instance of struct hdr, already read from page buffer and used + * to determine record type + * \param rptr Data-block-aligned pointer to position in page buffer where decoding is to + * begin. + * \param rec_offs_dblks Offset within record from which to start appending the decoded + * record. + * \param max_size_dblks Maximum number of data-blocks to read from pointer rptr. + * \returns Number of data-blocks read (consumed). + */ + virtual u_int32_t decode(rec_hdr& h, void* rptr, u_int32_t rec_offs_dblks, + u_int32_t max_size_dblks) = 0; + + virtual bool rcv_decode(rec_hdr h, std::ifstream* ifsp, std::size_t& rec_offs) = 0; + + virtual std::string& str(std::string& str) const = 0; + virtual std::size_t data_size() const = 0; + virtual std::size_t xid_size() const = 0; + virtual std::size_t rec_size() const = 0; + inline virtual u_int32_t rec_size_dblks() const { return size_dblks(rec_size()); } + static inline u_int32_t size_dblks(const std::size_t size) + { return size_blks(size, JRNL_DBLK_SIZE); } + static inline u_int32_t size_sblks(const std::size_t size) + { return size_blks(size, JRNL_DBLK_SIZE * JRNL_SBLK_SIZE); } + static inline u_int32_t size_blks(const std::size_t size, const std::size_t blksize) + { return (size + blksize - 1)/blksize; } + virtual u_int64_t rid() const = 0; + + protected: + virtual void chk_hdr() const = 0; + virtual void chk_hdr(u_int64_t rid) const = 0; + virtual void chk_tail() const = 0; + static void chk_hdr(const rec_hdr& hdr); + static void chk_rid(const rec_hdr& hdr, u_int64_t rid); + static void chk_tail(const rec_tail& tail, const rec_hdr& hdr); + virtual void clean() = 0; + }; // class jrec + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_JREC_H diff --git a/cpp/src/qpid/legacystore/jrnl/lp_map.cpp b/cpp/src/qpid/legacystore/jrnl/lp_map.cpp new file mode 100644 index 0000000000..8024ddadd2 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/lp_map.cpp @@ -0,0 +1,82 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file lp_map.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::lp_map (logical file map). See + * comments in file lp_map.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/lp_map.h" + +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ +lp_map::lp_map() : _map() {} +lp_map::~lp_map() {} + +void +lp_map::insert(u_int16_t lfid, u_int16_t pfid) +{ + lfpair ip = lfpair(lfid, pfid); + lfret ret = _map.insert(ip); + if (ret.second == false) + { + std::ostringstream oss; + oss << std::hex << "lfid=0x" << lfid << " pfid=0x" << pfid; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "lp_map", "insert"); + } +} + +void +lp_map::get_pfid_list(std::vector<u_int16_t>& pfid_list) +{ + for (lp_map_citr_t i = _map.begin(); i != _map.end(); i++) + pfid_list.push_back(i->second); +} + +// debug aid +std::string +lp_map::to_string() +{ + std::ostringstream oss; + oss << "{lfid:pfid "; + for (lp_map_citr_t i=_map.begin(); i!=_map.end(); i++) + { + if (i != _map.begin()) oss << ", "; + oss << (*i).first << ":" << (*i).second; + } + oss << "}"; + return oss.str(); +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/lp_map.h b/cpp/src/qpid/legacystore/jrnl/lp_map.h new file mode 100644 index 0000000000..c43cbc0173 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/lp_map.h @@ -0,0 +1,83 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file lp_map.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::lp_map (logical file map). + * See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_LP_MAP_H +#define QPID_LEGACYSTORE_JRNL_LP_MAP_H + +#include <map> +#include <string> +#include <sys/types.h> +#include <vector> + +namespace mrg +{ +namespace journal +{ + /** + * \class lp_map + * \brief Maps the logical file id (lfid) to the physical file id (pfid) in the journal. + * + * NOTE: NOT THREAD SAFE + */ + class lp_map + { + public: + typedef std::map<u_int16_t, u_int16_t> lp_map_t; + typedef lp_map_t::const_iterator lp_map_citr_t; + typedef lp_map_t::const_reverse_iterator lp_map_critr_t; + + private: + typedef std::pair<u_int16_t, u_int16_t> lfpair; + typedef std::pair<lp_map_t::iterator, bool> lfret; + lp_map_t _map; + + public: + lp_map(); + virtual ~lp_map(); + + void insert(u_int16_t lfid, u_int16_t pfid); + inline u_int16_t size() const { return u_int16_t(_map.size()); } + inline bool empty() const { return _map.empty(); } + inline lp_map_citr_t begin() { return _map.begin(); } + inline lp_map_citr_t end() { return _map.end(); } + inline lp_map_critr_t rbegin() { return _map.rbegin(); } + inline lp_map_critr_t rend() { return _map.rend(); } + void get_pfid_list(std::vector<u_int16_t>& pfid_list); + + // debug aid + std::string to_string(); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_LP_MAP_H diff --git a/cpp/src/qpid/legacystore/jrnl/lpmgr.cpp b/cpp/src/qpid/legacystore/jrnl/lpmgr.cpp new file mode 100644 index 0000000000..d7b0c9f516 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/lpmgr.cpp @@ -0,0 +1,226 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file lpmgr.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::lpmgr (non-logging file + * handle), used for controlling journal log files. See comments in file + * lpmgr.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/lpmgr.h" + +#include <cassert> +#include <qpid/legacystore/jrnl/jerrno.h> +#include <qpid/legacystore/jrnl/jexception.h> + +namespace mrg +{ +namespace journal +{ + +lpmgr::lpmgr() : _ae(false), _ae_max_jfiles(0) +{} + +lpmgr::~lpmgr() +{ + finalize(); +} + +void +lpmgr::initialize(const u_int16_t num_jfiles, + const bool ae, + const u_int16_t ae_max_jfiles, + jcntl* const jcp, + new_obj_fn_ptr fp) +{ + assert(jcp != 0); + finalize(); + + // Validate params + if (ae && ae_max_jfiles > 0 && ae_max_jfiles <= num_jfiles) + { + std::ostringstream oss; + oss << "ae_max_jfiles (" << ae_max_jfiles << ") <= num_jfiles (" << num_jfiles << ")"; + throw jexception(jerrno::JERR_LFMGR_BADAEFNUMLIM, oss.str(), "lpmgr", "initialize"); + } + _ae = ae; + _ae_max_jfiles = ae_max_jfiles; + + const std::size_t num_res_files = ae + ? (ae_max_jfiles ? ae_max_jfiles : JRNL_MAX_NUM_FILES) + : num_jfiles; + _fcntl_arr.reserve(num_res_files); + append(jcp, fp, num_jfiles); +} + +void +lpmgr::recover(const rcvdat& rd, + jcntl* const jcp, + new_obj_fn_ptr fp) +{ + assert(jcp != 0); + finalize(); + + // Validate rd params + if (rd._aemjf > 0 && rd._aemjf <= rd._njf) + { + std::ostringstream oss; + oss << "ae_max_jfiles (" << rd._aemjf << ") <= num_jfiles (" << rd._njf << ")"; + throw jexception(jerrno::JERR_LFMGR_BADAEFNUMLIM, oss.str(), "lpmgr", "recover"); + } + _ae = rd._ae; + _ae_max_jfiles = rd._aemjf; + + const std::size_t num_res_files = rd._ae + ? (rd._aemjf ? rd._aemjf : JRNL_MAX_NUM_FILES) + : rd._njf; + _fcntl_arr.reserve(num_res_files); + _fcntl_arr.assign(rd._njf, 0); + std::vector<u_int16_t> lfid_list(rd._fid_list.size(), 0); + for (std::size_t lid = 0; lid < rd._fid_list.size(); lid++) + lfid_list[rd._fid_list[lid]] = lid; + // NOTE: rd._fid_list may be smaller than rd._njf (journal may be empty or not yet file-cycled) + for (std::size_t pfid = 0; pfid < rd._njf; pfid++) + if (pfid < rd._fid_list.size()) + _fcntl_arr[lfid_list[pfid]] = fp(jcp, lfid_list[pfid], pfid, &rd); + else + _fcntl_arr[pfid] = fp(jcp, pfid, pfid, &rd); +} + +void +lpmgr::insert(const u_int16_t after_lfid, + jcntl* const jcp, + new_obj_fn_ptr fp, + const u_int16_t num_jfiles) +{ + assert(jcp != 0); + assert(after_lfid < _fcntl_arr.size()); + if (!_ae) throw jexception(jerrno::JERR_LFMGR_AEDISABLED, "lpmgr", "insert"); + if (num_jfiles == 0) return; + std::size_t pfid = _fcntl_arr.size(); + const u_int16_t eff_ae_max_jfiles = _ae_max_jfiles ? _ae_max_jfiles : JRNL_MAX_NUM_FILES; + if (pfid + num_jfiles > eff_ae_max_jfiles) + { + std::ostringstream oss; + oss << "num_files=" << pfid << " incr=" << num_jfiles << " limit=" << _ae_max_jfiles; + throw jexception(jerrno::JERR_LFMGR_AEFNUMLIMIT, oss.str(), "lpmgr", "insert"); + } + for (std::size_t lid = after_lfid + 1; lid <= after_lfid + num_jfiles; lid++, pfid++) + _fcntl_arr.insert(_fcntl_arr.begin() + lid, fp(jcp, lid, pfid, 0)); + for (std::size_t lid = after_lfid + num_jfiles + 1; lid < _fcntl_arr.size(); lid++) + { + fcntl* p = _fcntl_arr[lid]; + assert(p != 0); + p->set_lfid(p->lfid() + num_jfiles); + } +} + +void +lpmgr::finalize() +{ + for (u_int32_t i = 0; i < _fcntl_arr.size(); i++) + delete _fcntl_arr[i]; + _fcntl_arr.clear(); + _ae = false; + _ae_max_jfiles = 0; +} + +void +lpmgr::set_ae(const bool ae) +{ + if (ae && _ae_max_jfiles > 0 && _ae_max_jfiles <= _fcntl_arr.size()) + { + std::ostringstream oss; + oss << "ae_max_jfiles (" << _ae_max_jfiles << ") <= _fcntl_arr.size (" << _fcntl_arr.size() << ")"; + throw jexception(jerrno::JERR_LFMGR_BADAEFNUMLIM, oss.str(), "lpmgr", "set_ae"); + } + if (ae && _fcntl_arr.max_size() < _ae_max_jfiles) + _fcntl_arr.reserve(_ae_max_jfiles ? _ae_max_jfiles : JRNL_MAX_NUM_FILES); + _ae = ae; +} + +void +lpmgr::set_ae_max_jfiles(const u_int16_t ae_max_jfiles) +{ + if (_ae && ae_max_jfiles > 0 && ae_max_jfiles <= _fcntl_arr.size()) + { + std::ostringstream oss; + oss << "ae_max_jfiles (" << _ae_max_jfiles << ") <= _fcntl_arr.size() (" << _fcntl_arr.size() << ")"; + throw jexception(jerrno::JERR_LFMGR_BADAEFNUMLIM, oss.str(), "lpmgr", "set_ae_max_jfiles"); + } + if (_ae && _fcntl_arr.max_size() < ae_max_jfiles) + _fcntl_arr.reserve(ae_max_jfiles ? ae_max_jfiles : JRNL_MAX_NUM_FILES); + _ae_max_jfiles = ae_max_jfiles; +} + +u_int16_t +lpmgr::ae_jfiles_rem() const +{ + if (_ae_max_jfiles > _fcntl_arr.size()) return _ae_max_jfiles - _fcntl_arr.size(); + if (_ae_max_jfiles == 0) return JRNL_MAX_NUM_FILES - _fcntl_arr.size(); + return 0; +} + +// Testing functions + +void +lpmgr::get_pfid_list(std::vector<u_int16_t>& pfid_list) const +{ + pfid_list.clear(); + for (std::size_t i = 0; i < _fcntl_arr.size(); i++) + pfid_list.push_back(_fcntl_arr[i]->pfid()); +} + +void +lpmgr::get_lfid_list(std::vector<u_int16_t>& lfid_list) const +{ + lfid_list.clear(); + lfid_list.assign(_fcntl_arr.size(), 0); + for (std::size_t i = 0; i < _fcntl_arr.size(); i++) + lfid_list[_fcntl_arr[i]->pfid()] = i; +} + +// === protected fns === + +void +lpmgr::append(jcntl* const jcp, + new_obj_fn_ptr fp, + const u_int16_t num_jfiles) +{ + std::size_t s = _fcntl_arr.size(); + if (_ae_max_jfiles && s + num_jfiles > _ae_max_jfiles) + { + std::ostringstream oss; + oss << "num_files=" << s << " incr=" << num_jfiles << " limit=" << _ae_max_jfiles; + throw jexception(jerrno::JERR_LFMGR_AEFNUMLIMIT, oss.str(), "lpmgr", "append"); + } + for (std::size_t i = s; i < s + num_jfiles; i++) + _fcntl_arr.push_back(fp(jcp, i, i, 0)); +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/lpmgr.h b/cpp/src/qpid/legacystore/jrnl/lpmgr.h new file mode 100644 index 0000000000..be5c4494cc --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/lpmgr.h @@ -0,0 +1,303 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file lpmgr.h + * + * Qpid asynchronous store plugin library + * + * Class mrg::journal::lpmgr. See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_LPMGR_H +#define QPID_LEGACYSTORE_JRNL_LPMGR_H + +namespace mrg +{ +namespace journal +{ + class jcntl; + class lpmgr; +} +} + +#include "qpid/legacystore/jrnl/fcntl.h" +#include <vector> + +namespace mrg +{ +namespace journal +{ + + /** + * \brief LFID-PFID manager. This class maps the logical file id (lfid) to the physical file id (pfid) so that files + * may be inserted into the file ring buffer in (nearly) arbitrary logical locations while the physical ids continue + * to be appended. NOTE: NOT THREAD SAFE. + * + * The entire functionality of the LFID-PFID manager is to maintain an array of pointers to fcntl objects which have + * a one-to-one relationship to the physical %journal files. The logical file id (lfid) is used as an index to the + * array to read the mapped physical file id (pfid). By altering the order of these pointers within the array, the + * mapping of logical to physical files may be altered. This can be used to allow for the logical insertion of + * %journal files into a ring buffer, even though the physical file ids must be appended to those that preceded them. + * + * Since the insert() operation uses after-lfid as its position parameter, it is not possible to insert before lfid + * 0 - i.e. It is only possible to insert after an existing lfid. Consequently, lfid 0 and pfid 0 are always + * coincident in a %journal. Note, however, that inserting before lfid 0 is logically equivilent to inserting after + * the last lfid. + * + * When one or more files are inserted after a particular lfid, the lfids of the following files are incremented. The + * pfids of the inserted files follow those of all existing files, thus leading to a lfid-pfid discreppancy (ie no + * longer a one-to-one mapping): + * + * Example: Before insertion, %journal file headers would look as follows: + * <pre> + * Logical view (sorted by lfid): Physical view (sorted by pfid): + * +---+---+---+---+---+---+ +---+---+---+---+---+---+ + * pfid --> | 0 | 1 | 2 | 3 | 4 | 5 | pfid --> | 0 | 1 | 2 | 3 | 4 | 5 | + * lfid --> | 0 | 1 | 2 | 3 | 4 | 5 | lfid --> | 0 | 1 | 2 | 3 | 4 | 5 | + * +---+---+---+---+---+---+ +---+---+---+---+---+---+ + * </pre> + * + * After insertion of 2 files after lid 2 (marked with *s): + * <pre> + * Logical view (sorted by lfid): Physical view (sorted by pfid): + * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ + * pfid --> | 0 | 1 | 2 |*6*|*7*| 3 | 4 | 5 | pfid --> | 0 | 1 | 2 | 3 | 4 | 5 |*6*|*7*| + * lfid --> | 0 | 1 | 2 |*3*|*4*| 5 | 6 | 7 | lfid --> | 0 | 1 | 2 | 5 | 6 | 7 |*3*|*4*| + * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ + * </pre> + * + * The insert() function updates the internal map immediately, but the physical files (which have both the pfid and + * lfid written into the file header) are only updated as they are overwritten in the normal course of enqueueing + * and dequeueing messages. If the %journal should fail after insertion but before the files following those inserted + * are overwritten, then duplicate lfids will be present (though no duplicate pfids are possible). The overwrite + * indicator (owi) flag and the pfid numbers may be used to resolve the ambiguity and determine the logically earlier + * lfid in this case. + * + * Example: Before insertion, the current active write file being lfid/pfid 2 as determined by the owi flag, %journal + * file headers would look as follows: + * <pre> + * Logical view (sorted by lfid): Physical view (sorted by pfid): + * +---+---+---+---+---+---+ +---+---+---+---+---+---+ + * pfid --> | 0 | 1 | 2 | 3 | 4 | 5 | pfid --> | 0 | 1 | 2 | 3 | 4 | 5 | + * lfid --> | 0 | 1 | 2 | 3 | 4 | 5 | lfid --> | 0 | 1 | 2 | 3 | 4 | 5 | + * owi --> | t | t | t | f | f | f | owi --> | t | t | t | f | f | f | + * +---+---+---+---+---+---+ +---+---+---+---+---+---+ + * </pre> + * + * After inserting 2 files after lfid 2 and then 3 (the newly inserted file) - marked with *s: + * <pre> + * Logical view (sorted by lfid): Physical view (sorted by pfid): + * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ + * pfid --> | 0 | 1 | 2 |*6*|*7*| 3 | 4 | 5 | pfid --> | 0 | 1 | 2 | 3 | 4 | 5 |*3*|*4*| + * lfid --> | 0 | 1 | 2 |*3*|*4*| 3 | 4 | 5 | lfid --> | 0 | 1 | 2 | 3 | 4 | 5 |*3*|*4*| + * owi --> | t | t | t | t | t | f | f | f | owi --> | t | t | t | f | f | f | t | t | + * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ + * </pre> + * + * If a broker failure occurs at this point, then there are two independent tests that may be made to resolve + * duplicate lfids during recovery in such cases: + * <ol> + * <li>The correct lfid has owi flag that matches that of pfid/lfid 0</li> + * <li>The most recently inserted (hence correct) lfid has pfids that are higher than the duplicate that was not + * overwritten</li> + * </ol> + * + * NOTE: NOT THREAD SAFE. Provide external thread protection if used in multi-threaded environments. + */ + class lpmgr + { + public: + /** + * \brief Function pointer to function that will create a new fcntl object and return its pointer. + * + * \param jcp Pointer to jcntl instance from which journal file details will be obtained. + * \param lfid Logical file ID for new fcntl instance. + * \param pfid Physical file ID for file associated with new fcntl instance. + * \param rdp Pointer to rcvdat instance which conatins recovery information for new fcntl instance when + * recovering an existing file, or null if a new file is to be created. + */ + typedef fcntl* (new_obj_fn_ptr)(jcntl* const jcp, + const u_int16_t lfid, + const u_int16_t pfid, + const rcvdat* const rdp); + + private: + bool _ae; ///< Auto-expand mode + u_int16_t _ae_max_jfiles; ///< Max file count for auto-expansion; 0 = no limit + std::vector<fcntl*> _fcntl_arr; ///< Array of pointers to fcntl objects + + public: + lpmgr(); + virtual ~lpmgr(); + + /** + * \brief Initialize from scratch for a known number of %journal files. All lfid values are identical to pfid + * values (which is normal before any inserts have occurred). + * + * \param num_jfiles Number of files to be created, and consequently the number of fcntl objects in array + * _fcntl_arr. + * \param ae If true, allows auto-expansion; if false, disables auto-expansion. + * \param ae_max_jfiles The maximum number of files allowed for auto-expansion. Cannot be lower than the current + * number of files. However, a zero value disables the limit checks, and allows unlimited + * expansion. + * \param jcp Pointer to jcntl instance. This is used to find the file path and base filename so that + * new files may be created. + * \param fp Pointer to function which creates and returns a pointer to a new fcntl object (and hence + * causes a new %journal file to be created). + */ + void initialize(const u_int16_t num_jfiles, + const bool ae, + const u_int16_t ae_max_jfiles, + jcntl* const jcp, + new_obj_fn_ptr fp); + + /** + * \brief Initialize from a known lfid-pfid map pfid_list (within rcvdat param rd), which is usually obtained + * from a recover. The index of pfid_list is the logical file id (lfid); the value contained in the vector is + * the physical file id (pfid). + * + * \param rd Ref to rcvdat struct which contains recovery data and the pfid_list. + * \param jcp Pointer to jcntl instance. This is used to find the file path and base filename so that + * new files may be created. + * \param fp Pointer to function which creates and returns a pointer to a new fcntl object (and hence + * causes a new %journal file to be created). + */ + void recover(const rcvdat& rd, + jcntl* const jcp, + new_obj_fn_ptr fp); + + /** + * \brief Insert num_jfiles files after lfid index after_lfid. This causes all lfids after after_lfid to be + * increased by num_jfiles. + * + * Note that it is not possible to insert <i>before</i> lfid 0, and thus lfid 0 should always point to pfid 0. + * Inserting before lfid 0 is logically equivilent to inserting after the last lfid in a circular buffer. + * + * \param after_lfid Lid index after which to insert file(s). + * \param jcp Pointer to jcntl instance. This is used to find the file path and base filename so that + * new files may be created. + * \param fp Pointer to function which creates and returns a pointer to a new fcntl object (and hence + * causes a new %journal file to be created). + * \param num_jfiles The number of files by which to increase. + */ + void insert(const u_int16_t after_lfid, + jcntl* const jcp, + new_obj_fn_ptr fp, + const u_int16_t num_jfiles = 1); + + /** + * \brief Clears _fcntl_arr and deletes all fcntl instances. + */ + void finalize(); + + /** + * \brief Returns true if initialized; false otherwise. After construction, will return false until initialize() + * is called; thereafter true until finalize() is called, whereupon it will return false again. + * + * \return True if initialized; false otherwise. + */ + inline bool is_init() const { return _fcntl_arr.size() > 0; } + + /** + * \brief Returns true if auto-expand mode is enabled; false if not. + * + * \return True if auto-expand mode is enabled; false if not. + */ + inline bool is_ae() const { return _ae; } + + /** + * \brief Sets the auto-expand mode to enabled if ae is true, to disabled otherwise. The value of _ae_max_jfiles + * must be valid to succeed (i.e. _ae_max_jfiles must be greater than the current number of files or be zero). + * + * \param ae If true will enable auto-expand mode; if false will disable it. + */ + void set_ae(const bool ae); + + /** + * \brief Returns the number of %journal files, including any that were appended or inserted since + * initialization. + * + * \return Number of %journal files if initialized; 0 otherwise. + */ + inline u_int16_t num_jfiles() const { return static_cast<u_int16_t>(_fcntl_arr.size()); } + + /** + * \brief Returns the maximum number of files allowed for auto-expansion. + * + * \return Maximum number of files allowed for auto-expansion. A zero value represents a disabled limit + * - i.e. unlimited expansion. + */ + inline u_int16_t ae_max_jfiles() const { return _ae_max_jfiles; } + + /** + * \brief Sets the maximum number of files allowed for auto-expansion. A zero value disables the limit. + * + * \param ae_max_jfiles The maximum number of files allowed for auto-expansion. Cannot be lower than the current + * number of files. However, a zero value disables the limit checks, and allows unlimited + * expansion. + */ + void set_ae_max_jfiles(const u_int16_t ae_max_jfiles); + + /** + * \brief Calculates the number of future files available for auto-expansion. + * + * \return The number of future files available for auto-expansion. + */ + u_int16_t ae_jfiles_rem() const; + + /** + * \brief Get a pointer to fcntl instance for a given lfid. + * + * \return Pointer to fcntl object corresponding to logical file id lfid, or 0 if lfid is out of range + * (greater than number of files in use). + */ + inline fcntl* get_fcntlp(const u_int16_t lfid) const + { if (lfid >= _fcntl_arr.size()) return 0; return _fcntl_arr[lfid]; } + + // Testing functions + void get_pfid_list(std::vector<u_int16_t>& pfid_list) const; + void get_lfid_list(std::vector<u_int16_t>& lfid_list) const; + + protected: + + /** + * \brief Append num_jfiles files to the end of the logical and file id sequence. This is similar to extending + * the from-scratch initialization. + * + * \param jcp Pointer to jcntl instance. This is used to find the file path and base filename so that + * new files may be created. + * \param fp Pointer to function which creates and returns a pointer to a new fcntl object (and hence + * causes a new %journal file to be created). + * \param num_jfiles The number of files by which to increase. + */ + void append(jcntl* const jcp, + new_obj_fn_ptr fp, + const u_int16_t num_jfiles = 1); + + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_LPMGR_H diff --git a/cpp/src/qpid/legacystore/jrnl/pmgr.cpp b/cpp/src/qpid/legacystore/jrnl/pmgr.cpp new file mode 100644 index 0000000000..3dc61e2661 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/pmgr.cpp @@ -0,0 +1,215 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file pmgr.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::pmgr (page manager). See + * comments in file pmgr.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/pmgr.h" + +#include <cerrno> +#include <cstdlib> +#include <cstring> +#include "qpid/legacystore/jrnl/jcfg.h" +#include "qpid/legacystore/jrnl/jcntl.h" +#include "qpid/legacystore/jrnl/jerrno.h" +#include <sstream> + + +namespace mrg +{ +namespace journal +{ + +pmgr::page_cb::page_cb(u_int16_t index): + _index(index), + _state(UNUSED), + _wdblks(0), + _rdblks(0), + _pdtokl(0), + _wfh(0), + _rfh(0), + _pbuff(0) +{} + +const char* +pmgr::page_cb::state_str() const +{ + switch(_state) + { + case UNUSED: + return "UNUSED"; + case IN_USE: + return "IN_USE"; + case AIO_PENDING: + return "AIO_PENDING"; + case AIO_COMPLETE: + return "AIO_COMPLETE"; + } + return "<unknown>"; +} + +const u_int32_t pmgr::_sblksize = JRNL_SBLK_SIZE * JRNL_DBLK_SIZE; + +pmgr::pmgr(jcntl* jc, enq_map& emap, txn_map& tmap): + _cache_pgsize_sblks(0), + _cache_num_pages(0), + _jc(jc), + _emap(emap), + _tmap(tmap), + _page_base_ptr(0), + _page_ptr_arr(0), + _page_cb_arr(0), + _aio_cb_arr(0), + _aio_event_arr(0), + _ioctx(0), + _pg_index(0), + _pg_cntr(0), + _pg_offset_dblks(0), + _aio_evt_rem(0), + _cbp(0), + _enq_rec(), + _deq_rec(), + _txn_rec() +{} + +pmgr::~pmgr() +{ + pmgr::clean(); +} + +void +pmgr::initialize(aio_callback* const cbp, const u_int32_t cache_pgsize_sblks, const u_int16_t cache_num_pages) +{ + // As static use of this class keeps old values around, clean up first... + pmgr::clean(); + _pg_index = 0; + _pg_cntr = 0; + _pg_offset_dblks = 0; + _aio_evt_rem = 0; + _cache_pgsize_sblks = cache_pgsize_sblks; + _cache_num_pages = cache_num_pages; + _cbp = cbp; + + // 1. Allocate page memory (as a single block) + std::size_t cache_pgsize = _cache_num_pages * _cache_pgsize_sblks * _sblksize; + if (::posix_memalign(&_page_base_ptr, _sblksize, cache_pgsize)) + { + clean(); + std::ostringstream oss; + oss << "posix_memalign(): blksize=" << _sblksize << " size=" << cache_pgsize; + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__MALLOC, oss.str(), "pmgr", "initialize"); + } + // 2. Allocate array of page pointers + _page_ptr_arr = (void**)std::malloc(_cache_num_pages * sizeof(void*)); + MALLOC_CHK(_page_ptr_arr, "_page_ptr_arr", "pmgr", "initialize"); + + // 3. Allocate and initilaize page control block (page_cb) array + _page_cb_arr = (page_cb*)std::malloc(_cache_num_pages * sizeof(page_cb)); + MALLOC_CHK(_page_cb_arr, "_page_cb_arr", "pmgr", "initialize"); + std::memset(_page_cb_arr, 0, _cache_num_pages * sizeof(page_cb)); + + // 5. Allocate IO control block (iocb) array + _aio_cb_arr = (aio_cb*)std::malloc(_cache_num_pages * sizeof(aio_cb)); + MALLOC_CHK(_aio_cb_arr, "_aio_cb_arr", "pmgr", "initialize"); + + // 6. Set page pointers in _page_ptr_arr, _page_cb_arr and iocbs to pages within page block + for (u_int16_t i=0; i<_cache_num_pages; i++) + { + _page_ptr_arr[i] = (void*)((char*)_page_base_ptr + _cache_pgsize_sblks * _sblksize * i); + _page_cb_arr[i]._index = i; + _page_cb_arr[i]._state = UNUSED; + _page_cb_arr[i]._pbuff = _page_ptr_arr[i]; + _page_cb_arr[i]._pdtokl = new std::deque<data_tok*>; + _page_cb_arr[i]._pdtokl->clear(); + _aio_cb_arr[i].data = (void*)&_page_cb_arr[i]; + } + + // 7. Allocate io_event array, max one event per cache page plus one for each file + const u_int16_t max_aio_evts = _cache_num_pages + _jc->num_jfiles(); + _aio_event_arr = (aio_event*)std::malloc(max_aio_evts * sizeof(aio_event)); + MALLOC_CHK(_aio_event_arr, "_aio_event_arr", "pmgr", "initialize"); + + // 8. Initialize AIO context + if (int ret = aio::queue_init(max_aio_evts, &_ioctx)) + { + std::ostringstream oss; + oss << "io_queue_init() failed: " << FORMAT_SYSERR(-ret); + throw jexception(jerrno::JERR__AIO, oss.str(), "pmgr", "initialize"); + } +} + +void +pmgr::clean() +{ + // clean up allocated memory here + + if (_ioctx) + aio::queue_release(_ioctx); + + std::free(_page_base_ptr); + _page_base_ptr = 0; + + if (_page_cb_arr) + { + for (int i=0; i<_cache_num_pages; i++) + delete _page_cb_arr[i]._pdtokl; + std::free(_page_ptr_arr); + _page_ptr_arr = 0; + } + + std::free(_page_cb_arr); + _page_cb_arr = 0; + + std::free(_aio_cb_arr); + _aio_cb_arr = 0; + + std::free(_aio_event_arr); + _aio_event_arr = 0; +} + +const char* +pmgr::page_state_str(page_state ps) +{ + switch (ps) + { + case UNUSED: + return "UNUSED"; + case IN_USE: + return "IN_USE"; + case AIO_PENDING: + return "AIO_PENDING"; + case AIO_COMPLETE: + return "AIO_COMPLETE"; + } + return "<page_state unknown>"; +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/pmgr.h b/cpp/src/qpid/legacystore/jrnl/pmgr.h new file mode 100644 index 0000000000..64115e225e --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/pmgr.h @@ -0,0 +1,142 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file pmgr.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::pmgr (page manager). See + * class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_PMGR_H +#define QPID_LEGACYSTORE_JRNL_PMGR_H + +namespace mrg +{ +namespace journal +{ + class pmgr; + class jcntl; +} +} + +#include <deque> +#include "qpid/legacystore/jrnl/aio.h" +#include "qpid/legacystore/jrnl/aio_callback.h" +#include "qpid/legacystore/jrnl/data_tok.h" +#include "qpid/legacystore/jrnl/deq_rec.h" +#include "qpid/legacystore/jrnl/enq_map.h" +#include "qpid/legacystore/jrnl/enq_rec.h" +#include "qpid/legacystore/jrnl/fcntl.h" +#include "qpid/legacystore/jrnl/txn_map.h" +#include "qpid/legacystore/jrnl/txn_rec.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \brief Abstract class for managing either read or write page cache of arbitrary size and + * number of cache_num_pages. + */ + class pmgr + { + public: + /** + * \brief Enumeration of possible stats of a page within a page cache. + */ + enum page_state + { + UNUSED, ///< A page is uninitialized, contains no data. + IN_USE, ///< Page is in use. + AIO_PENDING, ///< An AIO request outstanding. + AIO_COMPLETE ///< An AIO request is complete. + }; + + protected: + /** + * \brief Page control block, carries control and state information for each page in the + * cache. + */ + struct page_cb + { + u_int16_t _index; ///< Index of this page + page_state _state; ///< Status of page + u_int64_t _frid; ///< First rid in page (used for fhdr init) + u_int32_t _wdblks; ///< Total number of dblks in page so far + u_int32_t _rdblks; ///< Total number of dblks in page + std::deque<data_tok*>* _pdtokl; ///< Page message tokens list + fcntl* _wfh; ///< File handle for incrementing write compl counts + fcntl* _rfh; ///< File handle for incrementing read compl counts + void* _pbuff; ///< Page buffer + + page_cb(u_int16_t index); ///< Convenience constructor + const char* state_str() const; ///< Return state as string for this pcb + }; + + static const u_int32_t _sblksize; ///< Disk softblock size + u_int32_t _cache_pgsize_sblks; ///< Size of page cache cache_num_pages + u_int16_t _cache_num_pages; ///< Number of page cache cache_num_pages + jcntl* _jc; ///< Pointer to journal controller + enq_map& _emap; ///< Ref to enqueue map + txn_map& _tmap; ///< Ref to transaction map + void* _page_base_ptr; ///< Base pointer to page memory + void** _page_ptr_arr; ///< Array of pointers to cache_num_pages in page memory + page_cb* _page_cb_arr; ///< Array of page_cb structs + aio_cb* _aio_cb_arr; ///< Array of iocb structs + aio_event* _aio_event_arr; ///< Array of io_events + io_context_t _ioctx; ///< AIO context for read/write operations + u_int16_t _pg_index; ///< Index of current page being used + u_int32_t _pg_cntr; ///< Page counter; determines if file rotation req'd + u_int32_t _pg_offset_dblks; ///< Page offset (used so far) in data blocks + u_int32_t _aio_evt_rem; ///< Remaining AIO events + aio_callback* _cbp; ///< Pointer to callback object + + enq_rec _enq_rec; ///< Enqueue record used for encoding/decoding + deq_rec _deq_rec; ///< Dequeue record used for encoding/decoding + txn_rec _txn_rec; ///< Transaction record used for encoding/decoding + + public: + pmgr(jcntl* jc, enq_map& emap, txn_map& tmap); + virtual ~pmgr(); + + virtual int32_t get_events(page_state state, timespec* const timeout, bool flush = false) = 0; + inline u_int32_t get_aio_evt_rem() const { return _aio_evt_rem; } + static const char* page_state_str(page_state ps); + inline u_int32_t cache_pgsize_sblks() const { return _cache_pgsize_sblks; } + inline u_int16_t cache_num_pages() const { return _cache_num_pages; } + + protected: + virtual void initialize(aio_callback* const cbp, const u_int32_t cache_pgsize_sblks, + const u_int16_t cache_num_pages); + virtual void rotate_page() = 0; + virtual void clean(); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_PMGR_H diff --git a/cpp/src/qpid/legacystore/jrnl/rcvdat.h b/cpp/src/qpid/legacystore/jrnl/rcvdat.h new file mode 100644 index 0000000000..a7ef2341f0 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rcvdat.h @@ -0,0 +1,181 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rcvdat.h + * + * Qpid asynchronous store plugin library + * + * Contains structure for recovery status and offset data. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_RCVDAT_H +#define QPID_LEGACYSTORE_JRNL_RCVDAT_H + +#include <cstddef> +#include <iomanip> +#include <map> +#include "qpid/legacystore/jrnl/jcfg.h" +#include <sstream> +#include <sys/types.h> +#include <vector> + +namespace mrg +{ +namespace journal +{ + + struct rcvdat + { + u_int16_t _njf; ///< Number of journal files + bool _ae; ///< Auto-expand mode + u_int16_t _aemjf; ///< Auto-expand mode max journal files + bool _owi; ///< Overwrite indicator + bool _frot; ///< First rotation flag + bool _jempty; ///< Journal data files empty + u_int16_t _ffid; ///< First file id + std::size_t _fro; ///< First record offset in ffid + u_int16_t _lfid; ///< Last file id + std::size_t _eo; ///< End offset (first byte past last record) + u_int64_t _h_rid; ///< Highest rid found + bool _lffull; ///< Last file is full + bool _jfull; ///< Journal is full + std::vector<u_int16_t> _fid_list; ///< Fid-lid mapping - list of fids in order of lid + std::vector<u_int32_t> _enq_cnt_list; ///< Number enqueued records found for each file + + rcvdat(): + _njf(0), + _ae(false), + _aemjf(0), + _owi(false), + _frot(false), + _jempty(true), + _ffid(0), + _fro(0), + _lfid(0), + _eo(0), + _h_rid(0), + _lffull(false), + _jfull(false), + _fid_list(), + _enq_cnt_list() + {} + + void reset(const u_int16_t num_jfiles, const bool auto_expand, const u_int16_t ae_max_jfiles) + { + _njf = num_jfiles; + _ae = auto_expand; + _aemjf = ae_max_jfiles; + _owi = false; + _frot = false; + _jempty = true; + _ffid = 0; + _fro = 0; + _lfid = 0; + _eo = 0; + _h_rid = 0; + _lffull = false; + _jfull = false; + _fid_list.clear(); + _enq_cnt_list.clear(); + _enq_cnt_list.resize(num_jfiles, 0); + } + + // Find first fid with enqueued records + u_int16_t ffid() + { + u_int16_t index = _ffid; + while (index != _lfid && _enq_cnt_list[index] == 0) + { + if (++index >= _njf) + index = 0; + } + return index; + } + + std::string to_string(const std::string& jid) + { + std::ostringstream oss; + oss << "Recover file analysis (jid=\"" << jid << "\"):" << std::endl; + oss << " Number of journal files (_njf) = " << _njf << std::endl; + oss << " Auto-expand mode (_ae) = " << (_ae ? "TRUE" : "FALSE") << std::endl; + if (_ae) oss << " Auto-expand mode max journal files (_aemjf) = " << _aemjf << std::endl; + oss << " Overwrite indicator (_owi) = " << (_owi ? "TRUE" : "FALSE") << std::endl; + oss << " First rotation (_frot) = " << (_frot ? "TRUE" : "FALSE") << std::endl; + oss << " Journal empty (_jempty) = " << (_jempty ? "TRUE" : "FALSE") << std::endl; + oss << " First (earliest) fid (_ffid) = " << _ffid << std::endl; + oss << " First record offset in first fid (_fro) = 0x" << std::hex << _fro << + std::dec << " (" << (_fro/JRNL_DBLK_SIZE) << " dblks)" << std::endl; + oss << " Last (most recent) fid (_lfid) = " << _lfid << std::endl; + oss << " End offset (_eo) = 0x" << std::hex << _eo << std::dec << " (" << + (_eo/JRNL_DBLK_SIZE) << " dblks)" << std::endl; + oss << " Highest rid (_h_rid) = 0x" << std::hex << _h_rid << std::dec << std::endl; + oss << " Last file full (_lffull) = " << (_lffull ? "TRUE" : "FALSE") << std::endl; + oss << " Journal full (_jfull) = " << (_jfull ? "TRUE" : "FALSE") << std::endl; + oss << " Normalized fid list (_fid_list) = ["; + for (std::vector<u_int16_t>::const_iterator i = _fid_list.begin(); i < _fid_list.end(); i++) + { + if (i != _fid_list.begin()) oss << ", "; + oss << *i; + } + oss << "]" << std::endl; + oss << " Enqueued records (txn & non-txn):" << std::endl; + for (unsigned i=0; i<_enq_cnt_list.size(); i++) + oss << " File " << std::setw(2) << i << ": " << _enq_cnt_list[i] << + std::endl; + return oss.str(); + } + + std::string to_log(const std::string& jid) + { + std::ostringstream oss; + oss << "Recover file analysis (jid=\"" << jid << "\"):"; + oss << " njf=" << _njf; + oss << " ae=" << (_owi ? "T" : "F"); + oss << " aemjf=" << _aemjf; + oss << " owi=" << (_ae ? "T" : "F"); + oss << " frot=" << (_frot ? "T" : "F"); + oss << " jempty=" << (_jempty ? "T" : "F"); + oss << " ffid=" << _ffid; + oss << " fro=0x" << std::hex << _fro << std::dec << " (" << + (_fro/JRNL_DBLK_SIZE) << " dblks)"; + oss << " lfid=" << _lfid; + oss << " eo=0x" << std::hex << _eo << std::dec << " (" << + (_eo/JRNL_DBLK_SIZE) << " dblks)"; + oss << " h_rid=0x" << std::hex << _h_rid << std::dec; + oss << " lffull=" << (_lffull ? "T" : "F"); + oss << " jfull=" << (_jfull ? "T" : "F"); + oss << " Enqueued records (txn & non-txn): [ "; + for (unsigned i=0; i<_enq_cnt_list.size(); i++) + { + if (i) oss << " "; + oss << "fid_" << std::setw(2) << std::setfill('0') << i << "=" << _enq_cnt_list[i]; + } + oss << " ]"; + return oss.str(); + } + }; +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_RCVDAT_H diff --git a/cpp/src/qpid/legacystore/jrnl/rec_hdr.h b/cpp/src/qpid/legacystore/jrnl/rec_hdr.h new file mode 100644 index 0000000000..ff6325a760 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rec_hdr.h @@ -0,0 +1,143 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rec_hdr.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::rec_hdr (record header), + * which is a common initial header used for all journal record structures + * except the record tail (rec_tail). + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_REC_HDR_H +#define QPID_LEGACYSTORE_JRNL_REC_HDR_H + +#include <cstddef> +#include "qpid/legacystore/jrnl/jcfg.h" +#include <sys/types.h> + +namespace mrg +{ +namespace journal +{ + +#pragma pack(1) + + /** + * \brief Struct for data common to the head of all journal files and records. + * This includes identification for the file type, the encoding version, endian + * indicator and a record ID. + * + * File header info in binary format (16 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ + * | magic | v | e | flags | + * +---+---+---+---+---+---+---+---+ + * | rid | + * +---+---+---+---+---+---+---+---+ + * v = file version (If the format or encoding of this file changes, then this + * number should be incremented) + * e = endian flag, false (0x00) for little endian, true (0x01) for big endian + * </pre> + * + * Note that journal files should be transferable between 32- and 64-bit + * hardware of the same endianness, but not between hardware of opposite + * entianness without some sort of binary conversion utility. Thus buffering + * will be needed for types that change size between 32- and 64-bit compiles. + */ + struct rec_hdr + { + u_int32_t _magic; ///< File type identifier (magic number) + u_int8_t _version; ///< File encoding version + u_int8_t _eflag; ///< Flag for determining endianness + u_int16_t _uflag; ///< User-defined flags + u_int64_t _rid; ///< Record ID (rotating 64-bit counter) + + // Global flags + static const u_int16_t HDR_OVERWRITE_INDICATOR_MASK = 0x1; + + // Convenience constructors and methods + /** + * \brief Default constructor, which sets all values to 0. + */ + inline rec_hdr(): _magic(0), _version(0), _eflag(0), _uflag(0), _rid(0) {} + + /** + * \brief Convenience constructor which initializes values during construction. + */ + inline rec_hdr(const u_int32_t magic, const u_int8_t version, const u_int64_t rid, + const bool owi): _magic(magic), _version(version), +#if defined(JRNL_BIG_ENDIAN) + _eflag(RHM_BENDIAN_FLAG), +#else + _eflag(RHM_LENDIAN_FLAG), +#endif + _uflag(owi ? HDR_OVERWRITE_INDICATOR_MASK : 0), _rid(rid) {} + + /** + * \brief Convenience copy method. + */ + inline void hdr_copy(const rec_hdr& h) + { + _magic = h._magic; + _version = h._version; + _eflag = h._eflag; + _uflag = h._uflag; + _rid =h._rid; + } + + /** + * \brief Resets all fields to 0 + */ + inline void reset() + { + _magic = 0; + _version = 0; + _eflag = 0; + _uflag = 0; + _rid = 0; + } + + inline bool get_owi() const { return _uflag & HDR_OVERWRITE_INDICATOR_MASK; } + + inline void set_owi(const bool owi) + { + _uflag = owi ? _uflag | HDR_OVERWRITE_INDICATOR_MASK : + _uflag & (~HDR_OVERWRITE_INDICATOR_MASK); + } + + /** + * \brief Returns the size of the header in bytes. + */ + inline static std::size_t size() { return sizeof(rec_hdr); } + }; // struct rec_hdr + +#pragma pack() + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_REC_HDR_H diff --git a/cpp/src/qpid/legacystore/jrnl/rec_tail.h b/cpp/src/qpid/legacystore/jrnl/rec_tail.h new file mode 100644 index 0000000000..0c36151927 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rec_tail.h @@ -0,0 +1,98 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rec_tail.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::rec_tail (record tail), used to + * finalize a persistent record. The presence of a valid tail at the expected + * position in the journal file indicates that the record write was completed. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_REC_TAIL_H +#define QPID_LEGACYSTORE_JRNL_REC_TAIL_H + +#include <cstddef> +#include "qpid/legacystore/jrnl/jcfg.h" + +namespace mrg +{ +namespace journal +{ + +#pragma pack(1) + + /** + * \brief Struct for data common to the tail of all records. The magic number + * used here is the binary inverse (1's complement) of the magic used in the + * record header; this minimizes possible confusion with other headers that may + * be present during recovery. The tail is used with all records that have either + * XIDs or data - ie any size-variable content. Currently the only records that + * do NOT use the tail are non-transactional dequeues and filler records. + * + * Record header info in binary format (12 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ + * | ~(magic) | rid | + * +---+---+---+---+---+---+---+---+ + * | rid (con't) | + * +---+---+---+---+ + * </pre> + */ + struct rec_tail + { + u_int32_t _xmagic; ///< Binary inverse (1's complement) of hdr magic number + u_int64_t _rid; ///< ID (rotating 64-bit counter) + + + /** + * \brief Default constructor, which sets all values to 0. + */ + inline rec_tail(): _xmagic(0xffffffff), _rid(0) {} + + /** + * \brief Convenience constructor which initializes values during construction from + * existing enq_hdr instance. + */ + inline rec_tail(const rec_hdr& h): _xmagic(~h._magic), _rid(h._rid) {} + + /** + * \brief Convenience constructor which initializes values during construction. + */ + inline rec_tail(const u_int32_t xmagic, const u_int64_t rid): _xmagic(xmagic), _rid(rid) {} + + /** + * \brief Returns the size of the header in bytes. + */ + inline static std::size_t size() { return sizeof(rec_tail); } + }; + +#pragma pack() + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_REC_TAIL_H diff --git a/cpp/src/qpid/legacystore/jrnl/rfc.cpp b/cpp/src/qpid/legacystore/jrnl/rfc.cpp new file mode 100644 index 0000000000..9b5ed95e81 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rfc.cpp @@ -0,0 +1,82 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rfc.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::rfc (rotating + * file controller). See comments in file rfc.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/rfc.h" + +#include <cassert> + +namespace mrg +{ +namespace journal +{ + +rfc::rfc(const lpmgr* lpmp): _lpmp(lpmp), _fc_index(0), _curr_fc(0) +{} + +rfc::~rfc() +{} + +void +rfc::finalize() +{ + unset_findex(); +} + +void +rfc::set_findex(const u_int16_t fc_index) +{ + _fc_index = fc_index; + _curr_fc = _lpmp->get_fcntlp(fc_index); + _curr_fc->rd_reset(); +} + +void +rfc::unset_findex() +{ + _fc_index = 0; + _curr_fc = 0; +} + +std::string +rfc::status_str() const +{ + if (!_lpmp->is_init()) + return "state: Uninitialized"; + if (_curr_fc == 0) + return "state: Inactive"; + std::ostringstream oss; + oss << "state: Active"; + return oss.str(); +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/rfc.h b/cpp/src/qpid/legacystore/jrnl/rfc.h new file mode 100644 index 0000000000..faa5d566ba --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rfc.h @@ -0,0 +1,193 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rfc.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::rfc (rotating + * file controller). See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_RFC_H +#define QPID_LEGACYSTORE_JRNL_RFC_H + +namespace mrg +{ +namespace journal +{ +class rfc; +} +} + +#include "qpid/legacystore/jrnl/lpmgr.h" +#include "qpid/legacystore/jrnl/enums.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \class rfc + * \brief Rotating File Controller (rfc) - Class to handle the manangement of an array of file controllers (fcntl) + * objects for use in a circular disk buffer (journal). Each fcntl object corresponds to a file in the journal. + * + * The following states exist in this class: + * + * <pre> + * is_init() is_active() + * +===+ _lpmp.is_init() == false + * +---------->| | Uninitialized: _curr_fc == 0 F F + * | +-->+===+ --+ + * | | | + * | | | + * | finalize() initialize() + * | | | + * | | | + * | +-- +===+<--+ _lpmp.is_init() == true + * finalize() | | Inactive: _curr_fc == 0 T F + * | +-->+===+ --+ + * | | | + * | | | + * | unset_findex() set_findex() + * | | | + * | | | + * | +-- +===+<--+ _lpmp.is_init() == true + * +---------- | | Active: _curr_fc != 0 T T + * +===+ + * </pre> + * + * The Uninitialized state is where the class starts after construction. Once the number of files is known and + * the array of file controllers allocated, then initialize() is called to set these, causing the state to move + * to Inactive. + * + * The Inactive state has the file controllers allocated and pointing to their respective journal files, but no + * current file controller has been selected. The pointer to the current file controller _curr_fc is null. Once the + * index of the active file is known, then calling set_findex() will set the index and internal pointer + * to the currently active file controller. This moves the state to Active. + * + * Note TODO: Comment on sync issues between change in num files in _lpmp and _fc_index/_curr_fc. + */ + class rfc + { + protected: + const lpmgr* _lpmp; ///< Pointer to jcntl's lpmgr instance containing lfid/pfid map and fcntl objects + u_int16_t _fc_index; ///< Index of current file controller + fcntl* _curr_fc; ///< Pointer to current file controller + + public: + rfc(const lpmgr* lpmp); + virtual ~rfc(); + + /** + * \brief Initialize the controller, moving from state Uninitialized to Inactive. The main function of + * initialize() is to set the number of files and the pointer to the fcntl array. + */ + virtual inline void initialize() {} + + /** + * \brief Reset the controller to Uninitialized state, usually called when the journal is stopped. Once called, + * initialize() must be called to reuse an instance. + */ + virtual void finalize(); + + /** + * \brief Check initialization state: true = Not Uninitialized, ie Initialized or Active; false = Uninitialized. + */ + virtual inline bool is_init() const { return _lpmp->is_init(); } + + /** + * \brief Check active state: true = Initialized and _curr_fc not null; false otherwise. + */ + virtual inline bool is_active() const { return _lpmp->is_init() && _curr_fc != 0; } + + /** + * \brief Sets the current file index and active fcntl object. Moves to state Active. + */ + virtual void set_findex(const u_int16_t fc_index); + + /** + * \brief Nulls the current file index and active fcntl pointer, moves to state Inactive. + */ + virtual void unset_findex(); + + /** + * \brief Rotate active file controller to next file in rotating file group. + * \exception jerrno::JERR__NINIT if called before calling initialize(). + */ + virtual iores rotate() = 0; + + /** + * \brief Returns the index of the currently active file within the rotating file group. + */ + inline u_int16_t index() const { return _fc_index; } + + /** + * \brief Returns the currently active journal file controller within the rotating file group. + */ + inline fcntl* file_controller() const { return _curr_fc; } + + /** + * \brief Returns the currently active physical file id (pfid) + */ + inline u_int16_t pfid() const { return _curr_fc->pfid(); } + + // Convenience access methods to current file controller + // Note: Do not call when not in active state + + inline u_int32_t enqcnt() const { return _curr_fc->enqcnt(); } + inline u_int32_t incr_enqcnt() { return _curr_fc->incr_enqcnt(); } + inline u_int32_t incr_enqcnt(const u_int16_t fid) { return _lpmp->get_fcntlp(fid)->incr_enqcnt(); } + inline u_int32_t add_enqcnt(const u_int32_t a) { return _curr_fc->add_enqcnt(a); } + inline u_int32_t add_enqcnt(const u_int16_t fid, const u_int32_t a) + { return _lpmp->get_fcntlp(fid)->add_enqcnt(a); } + inline u_int32_t decr_enqcnt(const u_int16_t fid) { return _lpmp->get_fcntlp(fid)->decr_enqcnt(); } + inline u_int32_t subtr_enqcnt(const u_int16_t fid, const u_int32_t s) + { return _lpmp->get_fcntlp(fid)->subtr_enqcnt(s); } + + virtual inline u_int32_t subm_cnt_dblks() const = 0; + virtual inline std::size_t subm_offs() const = 0; + virtual inline u_int32_t add_subm_cnt_dblks(u_int32_t a) = 0; + + virtual inline u_int32_t cmpl_cnt_dblks() const = 0; + virtual inline std::size_t cmpl_offs() const = 0; + virtual inline u_int32_t add_cmpl_cnt_dblks(u_int32_t a) = 0; + + virtual inline bool is_void() const = 0; + virtual inline bool is_empty() const = 0; + virtual inline u_int32_t remaining_dblks() const = 0; + virtual inline bool is_full() const = 0; + virtual inline bool is_compl() const = 0; + virtual inline u_int32_t aio_outstanding_dblks() const = 0; + virtual inline bool file_rotate() const = 0; + + // Debug aid + virtual std::string status_str() const; + }; // class rfc + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_RFC_H diff --git a/cpp/src/qpid/legacystore/jrnl/rmgr.cpp b/cpp/src/qpid/legacystore/jrnl/rmgr.cpp new file mode 100644 index 0000000000..3a11817d1e --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rmgr.cpp @@ -0,0 +1,698 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rmgr.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::rmgr (read manager). See + * comments in file rmgr.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/rmgr.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include "qpid/legacystore/jrnl/jcntl.h" +#include "qpid/legacystore/jrnl/jerrno.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +rmgr::rmgr(jcntl* jc, enq_map& emap, txn_map& tmap, rrfc& rrfc): + pmgr(jc, emap, tmap), + _rrfc(rrfc), + _hdr(), + _fhdr_buffer(0), + _fhdr_aio_cb_ptr(0), + _fhdr_rd_outstanding(false) +{} + +rmgr::~rmgr() +{ + rmgr::clean(); +} + +void +rmgr::initialize(aio_callback* const cbp) +{ + pmgr::initialize(cbp, JRNL_RMGR_PAGE_SIZE, JRNL_RMGR_PAGES); + clean(); + // Allocate memory for reading file header + if (::posix_memalign(&_fhdr_buffer, _sblksize, _sblksize)) + { + std::ostringstream oss; + oss << "posix_memalign(): blksize=" << _sblksize << " size=" << _sblksize; + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__MALLOC, oss.str(), "rmgr", "initialize"); + } + _fhdr_aio_cb_ptr = new aio_cb; + std::memset(_fhdr_aio_cb_ptr, 0, sizeof(aio_cb*)); +} + +void +rmgr::clean() +{ + std::free(_fhdr_buffer); + _fhdr_buffer = 0; + + if (_fhdr_aio_cb_ptr) + { + delete _fhdr_aio_cb_ptr; + _fhdr_aio_cb_ptr = 0; + } +} + +iores +rmgr::read(void** const datapp, std::size_t& dsize, void** const xidpp, std::size_t& xidsize, + bool& transient, bool& external, data_tok* dtokp, bool ignore_pending_txns) +{ + iores res = pre_read_check(dtokp); + if (res != RHM_IORES_SUCCESS) + { + set_params_null(datapp, dsize, xidpp, xidsize); + return res; + } + + if (dtokp->rstate() == data_tok::SKIP_PART) + { + if (_page_cb_arr[_pg_index]._state != AIO_COMPLETE) + { + aio_cycle(); // check if rd AIOs returned; initiate new reads if possible + return RHM_IORES_PAGE_AIOWAIT; + } + const iores res = skip(dtokp); + if (res != RHM_IORES_SUCCESS) + { + set_params_null(datapp, dsize, xidpp, xidsize); + return res; + } + } + if (dtokp->rstate() == data_tok::READ_PART) + { + assert(dtokp->rid() == _hdr._rid); + void* rptr = (void*)((char*)_page_ptr_arr[_pg_index] + (_pg_offset_dblks * JRNL_DBLK_SIZE)); + const iores res = read_enq(_hdr, rptr, dtokp); + dsize = _enq_rec.get_data(datapp); + xidsize = _enq_rec.get_xid(xidpp); + transient = _enq_rec.is_transient(); + external = _enq_rec.is_external(); + return res; + } + + set_params_null(datapp, dsize, xidpp, xidsize); + _hdr.reset(); + // Read header, determine next record type + while (true) + { + if(dblks_rem() == 0 && _rrfc.is_compl() && !_rrfc.is_wr_aio_outstanding()) + { + aio_cycle(); // check if rd AIOs returned; initiate new reads if possible + if(dblks_rem() == 0 && _rrfc.is_compl() && !_rrfc.is_wr_aio_outstanding()) + { + if (_jc->unflushed_dblks() > 0) + _jc->flush(); + else if (!_aio_evt_rem) + return RHM_IORES_EMPTY; + } + } + if (_page_cb_arr[_pg_index]._state != AIO_COMPLETE) + { + aio_cycle(); + return RHM_IORES_PAGE_AIOWAIT; + } + void* rptr = (void*)((char*)_page_ptr_arr[_pg_index] + (_pg_offset_dblks * JRNL_DBLK_SIZE)); + std::memcpy(&_hdr, rptr, sizeof(rec_hdr)); + switch (_hdr._magic) + { + case RHM_JDAT_ENQ_MAGIC: + { + _enq_rec.reset(); // sets enqueue rec size + // Check if RID of this rec is still enqueued, if so read it, else skip + bool is_enq = false; + int16_t fid = _emap.get_pfid(_hdr._rid); + if (fid < enq_map::EMAP_OK) + { + bool enforce_txns = !_jc->is_read_only() && !ignore_pending_txns; + // Block read for transactionally locked record (only when not recovering) + if (fid == enq_map::EMAP_LOCKED && enforce_txns) + return RHM_IORES_TXPENDING; + + // (Recover mode only) Ok, not in emap - now search tmap, if present then read + is_enq = _tmap.is_enq(_hdr._rid); + if (enforce_txns && is_enq) + return RHM_IORES_TXPENDING; + } + else + is_enq = true; + + if (is_enq) // ok, this record is enqueued, check it, then read it... + { + if (dtokp->rid()) + { + if (_hdr._rid != dtokp->rid()) + { + std::ostringstream oss; + oss << std::hex << "rid=0x" << _hdr._rid << "; dtok_rid=0x" << dtokp->rid() + << "; dtok_id=0x" << dtokp->id(); + throw jexception(jerrno::JERR_RMGR_RIDMISMATCH, oss.str(), "rmgr", "read"); + } + } + else + dtokp->set_rid(_hdr._rid); + +// TODO: Add member _fid to pmgr::page_cb which indicates the fid from which this page was +// populated. When this value is set in wmgr::flush() somewehere, then uncomment the following +// check: +// if (fid != _page_cb_arr[_pg_index]._fid) +// { +// std::ostringstream oss; +// oss << std::hex << std::setfill('0'); +// oss << "rid=0x" << std::setw(16) << _hdr._rid; +// oss << "; emap_fid=0x" << std::setw(4) << fid; +// oss << "; current_fid=" << _rrfc.fid(); +// throw jexception(jerrno::JERR_RMGR_FIDMISMATCH, oss.str(), "rmgr", +// "read"); +// } + + const iores res = read_enq(_hdr, rptr, dtokp); + dsize = _enq_rec.get_data(datapp); + xidsize = _enq_rec.get_xid(xidpp); + transient = _enq_rec.is_transient(); + external = _enq_rec.is_external(); + return res; + } + else // skip this record, it is already dequeued + consume_xid_rec(_hdr, rptr, dtokp); + break; + } + case RHM_JDAT_DEQ_MAGIC: + consume_xid_rec(_hdr, rptr, dtokp); + break; + case RHM_JDAT_TXA_MAGIC: + consume_xid_rec(_hdr, rptr, dtokp); + break; + case RHM_JDAT_TXC_MAGIC: + consume_xid_rec(_hdr, rptr, dtokp); + break; + case RHM_JDAT_EMPTY_MAGIC: + consume_filler(); + break; + default: + return RHM_IORES_EMPTY; + } + } +} + +int32_t +rmgr::get_events(page_state state, timespec* const timeout, bool flush) +{ + if (_aio_evt_rem == 0) // no events to get + return 0; + + int32_t ret; + if ((ret = aio::getevents(_ioctx, flush ? _aio_evt_rem : 1, _aio_evt_rem/*_cache_num_pages + _jc->num_jfiles()*/, _aio_event_arr, timeout)) < 0) + { + if (ret == -EINTR) // Interrupted by signal + return 0; + std::ostringstream oss; + oss << "io_getevents() failed: " << std::strerror(-ret) << " (" << ret << ")"; + throw jexception(jerrno::JERR__AIO, oss.str(), "rmgr", "get_events"); + } + if (ret == 0 && timeout) + return jerrno::AIO_TIMEOUT; + + std::vector<u_int16_t> pil; + pil.reserve(ret); + for (int i=0; i<ret; i++) // Index of returned AIOs + { + if (_aio_evt_rem == 0) + { + std::ostringstream oss; + oss << "_aio_evt_rem; evt " << (i + 1) << " of " << ret; + throw jexception(jerrno::JERR__UNDERFLOW, oss.str(), "rmgr", "get_events"); + } + _aio_evt_rem--; + aio_cb* aiocbp = _aio_event_arr[i].obj; // This I/O control block (iocb) + page_cb* pcbp = (page_cb*)(aiocbp->data); // This page control block (pcb) + long aioret = (long)_aio_event_arr[i].res; + if (aioret < 0) + { + std::ostringstream oss; + oss << "AIO read operation failed: " << std::strerror(-aioret) << " (" << aioret << ")"; + oss << " [pg=" << pcbp->_index << " buf=" << aiocbp->u.c.buf; + oss << " rsize=0x" << std::hex << aiocbp->u.c.nbytes; + oss << " offset=0x" << aiocbp->u.c.offset << std::dec; + oss << " fh=" << aiocbp->aio_fildes << "]"; + throw jexception(jerrno::JERR__AIO, oss.str(), "rmgr", "get_events"); + } + + if (pcbp) // Page reads have pcb + { + if (pcbp->_rfh->rd_subm_cnt_dblks() >= JRNL_SBLK_SIZE) // Detects if write reset of this fcntl obj has occurred. + { + // Increment the completed read offset + // NOTE: We cannot use _rrfc here, as it may have rotated since submitting count. + // Use stored pointer to fcntl in the pcb instead. + pcbp->_rdblks = aiocbp->u.c.nbytes / JRNL_DBLK_SIZE; + pcbp->_rfh->add_rd_cmpl_cnt_dblks(pcbp->_rdblks); + pcbp->_state = state; + pil[i] = pcbp->_index; + } + } + else // File header reads have no pcb + { + std::memcpy(&_fhdr, _fhdr_buffer, sizeof(file_hdr)); + _rrfc.add_cmpl_cnt_dblks(JRNL_SBLK_SIZE); + + u_int32_t fro_dblks = (_fhdr._fro / JRNL_DBLK_SIZE) - JRNL_SBLK_SIZE; + // Check fro_dblks does not exceed the write pointers which can happen in some corrupted journal recoveries + if (fro_dblks > _jc->wr_subm_cnt_dblks(_fhdr._pfid) - JRNL_SBLK_SIZE) + fro_dblks = _jc->wr_subm_cnt_dblks(_fhdr._pfid) - JRNL_SBLK_SIZE; + _pg_cntr = fro_dblks / (JRNL_RMGR_PAGE_SIZE * JRNL_SBLK_SIZE); + u_int32_t tot_pg_offs_dblks = _pg_cntr * JRNL_RMGR_PAGE_SIZE * JRNL_SBLK_SIZE; + _pg_index = _pg_cntr % JRNL_RMGR_PAGES; + _pg_offset_dblks = fro_dblks - tot_pg_offs_dblks; + _rrfc.add_subm_cnt_dblks(tot_pg_offs_dblks); + _rrfc.add_cmpl_cnt_dblks(tot_pg_offs_dblks); + + _fhdr_rd_outstanding = false; + _rrfc.set_valid(); + } + } + + // Perform AIO return callback + if (_cbp && ret) + _cbp->rd_aio_cb(pil); + return ret; +} + +void +rmgr::recover_complete() +{} + +void +rmgr::invalidate() +{ + if (_rrfc.is_valid()) + _rrfc.set_invalid(); +} + +void +rmgr::flush(timespec* timeout) +{ + // Wait for any outstanding AIO read operations to complete before synchronizing + while (_aio_evt_rem) + { + if (get_events(AIO_COMPLETE, timeout) == jerrno::AIO_TIMEOUT) // timed out, nothing returned + { + throw jexception(jerrno::JERR__TIMEOUT, + "Timed out waiting for outstanding read aio to return", "rmgr", "init_validation"); + } + } + + // Reset all read states and pointers + for (int i=0; i<_cache_num_pages; i++) + _page_cb_arr[i]._state = UNUSED; + _rrfc.unset_findex(); + _pg_index = 0; + _pg_offset_dblks = 0; +} + +bool +rmgr::wait_for_validity(timespec* timeout, const bool throw_on_timeout) +{ + bool timed_out = false; + while (!_rrfc.is_valid() && !timed_out) + { + timed_out = get_events(AIO_COMPLETE, timeout) == jerrno::AIO_TIMEOUT; + if (timed_out && throw_on_timeout) + throw jexception(jerrno::JERR__TIMEOUT, "Timed out waiting for read validity", "rmgr", "wait_for_validity"); + } + return _rrfc.is_valid(); +} + +iores +rmgr::pre_read_check(data_tok* dtokp) +{ + if (_aio_evt_rem) + get_events(AIO_COMPLETE, 0); + + if (!_rrfc.is_valid()) + return RHM_IORES_RCINVALID; + + // block reads until outstanding file header read completes as fro is needed to read + if (_fhdr_rd_outstanding) + return RHM_IORES_PAGE_AIOWAIT; + + if(dblks_rem() == 0 && _rrfc.is_compl() && !_rrfc.is_wr_aio_outstanding()) + { + aio_cycle(); // check if any AIOs have returned + if(dblks_rem() == 0 && _rrfc.is_compl() && !_rrfc.is_wr_aio_outstanding()) + { + if (_jc->unflushed_dblks() > 0) + _jc->flush(); + else if (!_aio_evt_rem) + return RHM_IORES_EMPTY; + } + } + + // Check write state of this token is ENQ - required for read + if (dtokp) + { + if (!dtokp->is_readable()) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "dtok_id=0x" << std::setw(8) << dtokp->id(); + oss << "; dtok_rid=0x" << std::setw(16) << dtokp->rid(); + oss << "; dtok_wstate=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_RMGR_ENQSTATE, oss.str(), "rmgr", "pre_read_check"); + } + } + + return RHM_IORES_SUCCESS; +} + +iores +rmgr::read_enq(rec_hdr& h, void* rptr, data_tok* dtokp) +{ + if (_page_cb_arr[_pg_index]._state != AIO_COMPLETE) + { + aio_cycle(); // check if any AIOs have returned + return RHM_IORES_PAGE_AIOWAIT; + } + + // Read data from this page, first block will have header and data size. + u_int32_t dblks_rd = _enq_rec.decode(h, rptr, dtokp->dblocks_read(), dblks_rem()); + dtokp->incr_dblocks_read(dblks_rd); + + _pg_offset_dblks += dblks_rd; + + // If data still incomplete, move to next page and decode again + while (dtokp->dblocks_read() < _enq_rec.rec_size_dblks()) + { + rotate_page(); + if (_page_cb_arr[_pg_index]._state != AIO_COMPLETE) + { + dtokp->set_rstate(data_tok::READ_PART); + dtokp->set_dsize(_enq_rec.data_size()); + return RHM_IORES_PAGE_AIOWAIT; + } + + rptr = (void*)((char*)_page_ptr_arr[_pg_index]); + dblks_rd = _enq_rec.decode(h, rptr, dtokp->dblocks_read(), dblks_rem()); + dtokp->incr_dblocks_read(dblks_rd); + _pg_offset_dblks += dblks_rd; + } + + // If we have finished with this page, rotate it + if (dblks_rem() == 0) + rotate_page(); + + // Set the record size in dtokp + dtokp->set_rstate(data_tok::READ); + dtokp->set_dsize(_enq_rec.data_size()); + return RHM_IORES_SUCCESS; +} + +void +rmgr::consume_xid_rec(rec_hdr& h, void* rptr, data_tok* dtokp) +{ + if (h._magic == RHM_JDAT_ENQ_MAGIC) + { + enq_hdr ehdr; + std::memcpy(&ehdr, rptr, sizeof(enq_hdr)); + if (ehdr.is_external()) + dtokp->set_dsize(ehdr._xidsize + sizeof(enq_hdr) + sizeof(rec_tail)); + else + dtokp->set_dsize(ehdr._xidsize + ehdr._dsize + sizeof(enq_hdr) + sizeof(rec_tail)); + } + else if (h._magic == RHM_JDAT_DEQ_MAGIC) + { + deq_hdr dhdr; + std::memcpy(&dhdr, rptr, sizeof(deq_hdr)); + if (dhdr._xidsize) + dtokp->set_dsize(dhdr._xidsize + sizeof(deq_hdr) + sizeof(rec_tail)); + else + dtokp->set_dsize(sizeof(deq_hdr)); + } + else if (h._magic == RHM_JDAT_TXA_MAGIC || h._magic == RHM_JDAT_TXC_MAGIC) + { + txn_hdr thdr; + std::memcpy(&thdr, rptr, sizeof(txn_hdr)); + dtokp->set_dsize(thdr._xidsize + sizeof(txn_hdr) + sizeof(rec_tail)); + } + else + { + std::ostringstream oss; + oss << "Record type found = \"" << (char*)&h._magic << "\""; + throw jexception(jerrno::JERR_RMGR_BADRECTYPE, oss.str(), "rmgr", "consume_xid_rec"); + } + dtokp->set_dblocks_read(0); + skip(dtokp); +} + +void +rmgr::consume_filler() +{ + // Filler (Magic "RHMx") is one dblk by definition + _pg_offset_dblks++; + if (dblks_rem() == 0) + rotate_page(); +} + +iores +rmgr::skip(data_tok* dtokp) +{ + u_int32_t dsize_dblks = jrec::size_dblks(dtokp->dsize()); + u_int32_t tot_dblk_cnt = dtokp->dblocks_read(); + while (true) + { + u_int32_t this_dblk_cnt = 0; + if (dsize_dblks - tot_dblk_cnt > dblks_rem()) + this_dblk_cnt = dblks_rem(); + else + this_dblk_cnt = dsize_dblks - tot_dblk_cnt; + if (this_dblk_cnt) + { + dtokp->incr_dblocks_read(this_dblk_cnt); + _pg_offset_dblks += this_dblk_cnt; + tot_dblk_cnt += this_dblk_cnt; + } + // If skip still incomplete, move to next page and decode again + if (tot_dblk_cnt < dsize_dblks) + { + if (dblks_rem() == 0) + rotate_page(); + if (_page_cb_arr[_pg_index]._state != AIO_COMPLETE) + { + dtokp->set_rstate(data_tok::SKIP_PART); + return RHM_IORES_PAGE_AIOWAIT; + } + } + else + { + // Skip complete, put state back to unread + dtokp->set_rstate(data_tok::UNREAD); + dtokp->set_dsize(0); + dtokp->set_dblocks_read(0); + + // If we have finished with this page, rotate it + if (dblks_rem() == 0) + rotate_page(); + return RHM_IORES_SUCCESS; + } + } +} + +iores +rmgr::aio_cycle() +{ + // Perform validity checks + if (_fhdr_rd_outstanding) // read of file header still outstanding in aio + return RHM_IORES_SUCCESS; + if (!_rrfc.is_valid()) + { + // Flush and reset all read states and pointers + flush(&jcntl::_aio_cmpl_timeout); + + _jc->get_earliest_fid(); // determine initial file to read; calls _rrfc.set_findex() to set value + // If this file has not yet been written to, return RHM_IORES_EMPTY + if (_rrfc.is_void() && !_rrfc.is_wr_aio_outstanding()) + return RHM_IORES_EMPTY; + init_file_header_read(); // send off AIO read request for file header + return RHM_IORES_SUCCESS; + } + + int16_t first_uninit = -1; + u_int16_t num_uninit = 0; + u_int16_t num_compl = 0; + bool outstanding = false; + // Index must start with current buffer and cycle around so that first + // uninitialized buffer is initialized first + for (u_int16_t i=_pg_index; i<_pg_index+_cache_num_pages; i++) + { + int16_t ci = i % _cache_num_pages; + switch (_page_cb_arr[ci]._state) + { + case UNUSED: + if (first_uninit < 0) + first_uninit = ci; + num_uninit++; + break; + case IN_USE: + break; + case AIO_PENDING: + outstanding = true; + break; + case AIO_COMPLETE: + num_compl++; + break; + default:; + } + } + iores res = RHM_IORES_SUCCESS; + if (num_uninit) + res = init_aio_reads(first_uninit, num_uninit); + else if (num_compl == _cache_num_pages) // This condition exists after invalidation + res = init_aio_reads(0, _cache_num_pages); + if (outstanding) + get_events(AIO_COMPLETE, 0); + return res; +} + +iores +rmgr::init_aio_reads(const int16_t first_uninit, const u_int16_t num_uninit) +{ + for (int16_t i=0; i<num_uninit; i++) + { + if (_rrfc.is_void()) // Nothing to do; this file not yet written to + break; + + if (_rrfc.subm_offs() == 0) + { + _rrfc.add_subm_cnt_dblks(JRNL_SBLK_SIZE); + _rrfc.add_cmpl_cnt_dblks(JRNL_SBLK_SIZE); + } + + // TODO: Future perf improvement: Do a single AIO read for all available file + // space into all contiguous empty pages in one AIO operation. + + u_int32_t file_rem_dblks = _rrfc.remaining_dblks(); + file_rem_dblks -= file_rem_dblks % JRNL_SBLK_SIZE; // round down to closest sblk boundary + u_int32_t pg_size_dblks = JRNL_RMGR_PAGE_SIZE * JRNL_SBLK_SIZE; + u_int32_t rd_size = file_rem_dblks > pg_size_dblks ? pg_size_dblks : file_rem_dblks; + if (rd_size) + { + int16_t pi = (i + first_uninit) % _cache_num_pages; + // TODO: For perf, combine contiguous pages into single read + // 1 or 2 AIOs needed depending on whether read block folds + aio_cb* aiocbp = &_aio_cb_arr[pi]; + aio::prep_pread_2(aiocbp, _rrfc.fh(), _page_ptr_arr[pi], rd_size * JRNL_DBLK_SIZE, _rrfc.subm_offs()); + if (aio::submit(_ioctx, 1, &aiocbp) < 0) + throw jexception(jerrno::JERR__AIO, "rmgr", "init_aio_reads"); + _rrfc.add_subm_cnt_dblks(rd_size); + _aio_evt_rem++; + _page_cb_arr[pi]._state = AIO_PENDING; + _page_cb_arr[pi]._rfh = _rrfc.file_controller(); + } + else // If there is nothing to read for this page, neither will there be for the others... + break; + if (_rrfc.file_rotate()) + _rrfc.rotate(); + } + return RHM_IORES_SUCCESS; +} + +void +rmgr::rotate_page() +{ + _page_cb_arr[_pg_index]._rdblks = 0; + _page_cb_arr[_pg_index]._state = UNUSED; + if (_pg_offset_dblks >= JRNL_RMGR_PAGE_SIZE * JRNL_SBLK_SIZE) + { + _pg_offset_dblks = 0; + _pg_cntr++; + } + if (++_pg_index >= _cache_num_pages) + _pg_index = 0; + aio_cycle(); + _pg_offset_dblks = 0; + // This counter is for bookkeeping only, page rotates are handled directly in init_aio_reads() + // FIXME: _pg_cntr should be sync'd with aio ops, not use of page as it is now... + // Need to move reset into if (_rrfc.file_rotate()) above. + if (_pg_cntr >= (_jc->jfsize_sblks() / JRNL_RMGR_PAGE_SIZE)) + _pg_cntr = 0; +} + +u_int32_t +rmgr::dblks_rem() const +{ + return _page_cb_arr[_pg_index]._rdblks - _pg_offset_dblks; +} + +void +rmgr::set_params_null(void** const datapp, std::size_t& dsize, void** const xidpp, std::size_t& xidsize) +{ + *datapp = 0; + dsize = 0; + *xidpp = 0; + xidsize = 0; +} + +void +rmgr::init_file_header_read() +{ + _jc->fhdr_wr_sync(_rrfc.index()); // wait if the file header write is outstanding + int rfh = _rrfc.fh(); + aio::prep_pread_2(_fhdr_aio_cb_ptr, rfh, _fhdr_buffer, _sblksize, 0); + if (aio::submit(_ioctx, 1, &_fhdr_aio_cb_ptr) < 0) + throw jexception(jerrno::JERR__AIO, "rmgr", "init_file_header_read"); + _aio_evt_rem++; + _rrfc.add_subm_cnt_dblks(JRNL_SBLK_SIZE); + _fhdr_rd_outstanding = true; +} + +/* TODO (sometime in the future) +const iores +rmgr::get(const u_int64_t& rid, const std::size_t& dsize, const std::size_t& dsize_avail, + const void** const data, bool auto_discard) +{ + return RHM_IORES_SUCCESS; +} + +const iores +rmgr::discard(data_tok* dtokp) +{ + return RHM_IORES_SUCCESS; +} +*/ + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/rmgr.h b/cpp/src/qpid/legacystore/jrnl/rmgr.h new file mode 100644 index 0000000000..ae4b5f56c8 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rmgr.h @@ -0,0 +1,114 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rmgr.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::rmgr (read manager). See + * class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_RMGR_H +#define QPID_LEGACYSTORE_JRNL_RMGR_H + +namespace mrg +{ +namespace journal +{ +class rmgr; +} +} + +#include <cstring> +#include "jrnl/enums.h" +#include "jrnl/file_hdr.h" +#include "jrnl/pmgr.h" +#include "jrnl/rec_hdr.h" +#include "jrnl/rrfc.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \brief Class for managing a read page cache of arbitrary size and number of pages. + * + * The read page cache works on the principle of filling as many pages as possilbe in advance of + * reading the data. This ensures that delays caused by AIO operations are minimized. + */ + class rmgr : public pmgr + { + private: + rrfc& _rrfc; ///< Ref to read rotating file controller + rec_hdr _hdr; ///< Header used to determind record type + + void* _fhdr_buffer; ///< Buffer used for fhdr reads + aio_cb* _fhdr_aio_cb_ptr; ///< iocb pointer for fhdr reads + file_hdr _fhdr; ///< file header instance for reading file headers + bool _fhdr_rd_outstanding; ///< true if a fhdr read is outstanding + + public: + rmgr(jcntl* jc, enq_map& emap, txn_map& tmap, rrfc& rrfc); + virtual ~rmgr(); + + using pmgr::initialize; + void initialize(aio_callback* const cbp); + iores read(void** const datapp, std::size_t& dsize, void** const xidpp, + std::size_t& xidsize, bool& transient, bool& external, data_tok* dtokp, + bool ignore_pending_txns); + int32_t get_events(page_state state, timespec* const timeout, bool flush = false); + void recover_complete(); + inline iores synchronize() { if (_rrfc.is_valid()) return RHM_IORES_SUCCESS; return aio_cycle(); } + void invalidate(); + bool wait_for_validity(timespec* const timeout, const bool throw_on_timeout = false); + + /* TODO (if required) + const iores get(const u_int64_t& rid, const std::size_t& dsize, const std::size_t& dsize_avail, + const void** const data, bool auto_discard); + const iores discard(data_tok* dtok); + */ + + private: + void clean(); + void flush(timespec* timeout); + iores pre_read_check(data_tok* dtokp); + iores read_enq(rec_hdr& h, void* rptr, data_tok* dtokp); + void consume_xid_rec(rec_hdr& h, void* rptr, data_tok* dtokp); + void consume_filler(); + iores skip(data_tok* dtokp); + iores aio_cycle(); + iores init_aio_reads(const int16_t first_uninit, const u_int16_t num_uninit); + void rotate_page(); + u_int32_t dblks_rem() const; + void set_params_null(void** const datapp, std::size_t& dsize, void** const xidpp, + std::size_t& xidsize); + void init_file_header_read(); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_RMGR_H diff --git a/cpp/src/qpid/legacystore/jrnl/rrfc.cpp b/cpp/src/qpid/legacystore/jrnl/rrfc.cpp new file mode 100644 index 0000000000..fc6f5d427f --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rrfc.cpp @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rrfc.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::rrfc (rotating + * file controller). See comments in file rrfc.h for details. + * + * \author Kim van der Riet + */ + + +#include "qpid/legacystore/jrnl/rrfc.h" + +#include <cerrno> +#include <fcntl.h> +#include <unistd.h> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" + +namespace mrg +{ +namespace journal +{ + +rrfc::rrfc(const lpmgr* lpmp): rfc(lpmp), _fh(-1), _valid(false) +{} + +rrfc::~rrfc() +{ + close_fh(); +} + +void +rrfc::finalize() +{ + unset_findex(); + rfc::finalize(); +} + +void +rrfc::set_findex(const u_int16_t fc_index) +{ + rfc::set_findex(fc_index); + open_fh(_curr_fc->fname()); +} + +void +rrfc::unset_findex() +{ + set_invalid(); + close_fh(); + rfc::unset_findex(); +} + +iores +rrfc::rotate() +{ + if (!_lpmp->num_jfiles()) + throw jexception(jerrno::JERR__NINIT, "rrfc", "rotate"); + u_int16_t next_fc_index = _fc_index + 1; + if (next_fc_index == _lpmp->num_jfiles()) + next_fc_index = 0; + set_findex(next_fc_index); + return RHM_IORES_SUCCESS; +} + +std::string +rrfc::status_str() const +{ + std::ostringstream oss; + oss << "rrfc: " << rfc::status_str(); + if (is_active()) + oss << " fcntl[" << _fc_index << "]: " << _curr_fc->status_str(); + return oss.str(); +} + +// === protected functions === + +void +rrfc::open_fh(const std::string& fn) +{ + close_fh(); + _fh = ::open(fn.c_str(), O_RDONLY | O_DIRECT); + if (_fh < 0) + { + std::ostringstream oss; + oss << "file=\"" << fn << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_RRFC_OPENRD, oss.str(), "rrfc", "open_fh"); + } +} + +void +rrfc::close_fh() +{ + if (_fh >= 0) + { + ::close(_fh); + _fh = -1; + } +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/rrfc.h b/cpp/src/qpid/legacystore/jrnl/rrfc.h new file mode 100644 index 0000000000..5066d6048a --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/rrfc.h @@ -0,0 +1,179 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file rrfc.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::rrfc (rotating + * file controller). See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_RRFC_H +#define QPID_LEGACYSTORE_JRNL_RRFC_H + +namespace mrg +{ +namespace journal +{ +class rrfc; +} +} + +#include "qpid/legacystore/jrnl/fcntl.h" +#include "qpid/legacystore/jrnl/rfc.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \class rrfc + * \brief Read Rotating File Controller (rrfc) - Subclassed from pure virtual class rfc. Used to control the read + * pipeline in a rotating file buffer or journal. See class rfc for further details. + * + * The states that exist in this class are identical to class rfc from which it inherits, but in addition, the value + * of the read file handle _fh is also considered. The calls to set_findex also opens the file handle _fh to the + * active file for reading. Similarly, unset_findex() closes this file handle. + * + * <pre> + * is_init() is_active() + * +===+ _lpmp.is_init() == false + * +---------->| | Uninitialized: _curr_fc == 0 F F + * | +-->+===+ --+ _fh == -1 + * | | | + * | | | + * | finalize() initialize() + * | | | + * | | | + * | +-- +===+<--+ _lpmp.is_init() == true + * finalize() | | Inactive: _curr_fc == 0 T F + * | +-->+===+ --+ _fh == -1 + * | | | + * | | | + * | unset_findex() set_findex() + * | | | + * | | | + * | +-- +===+<--+ _lpmp.is_init() == true + * +---------- | | Active: _curr_fc != 0 T T + * +===+ _fh >= 0 + * </pre> + * + * In adition to the states above, class rrfc contains a validity flag. This is operated indepenedently of the state + * machine. This flag (_valid) indicates when the read buffers are valid for reading. This is not strictly a state, + * but simply a flag used to keep track of the status, and is set/unset with calls to set_valid() and set_invalid() + * respectively. + */ + class rrfc : public rfc + { + protected: + int _fh; ///< Read file handle + bool _valid; ///< Flag is true when read pages contain vailid data + + public: + rrfc(const lpmgr* lpmp); + virtual ~rrfc(); + + /** + * \brief Initialize the controller, moving from state Uninitialized to Initialized. The main function of + * initialize() is to set the number of files and the pointer to the fcntl array. + */ + inline void initialize() { rfc::initialize(); _valid = false; } + + /** + * \brief Reset the controller to Uninitialized state, usually called when the journal is stopped. Once called, + * initialize() must be called to reuse an instance. + */ + void finalize(); + + /** + * \brief Opens the file handle for reading a particular fid. Moves to state open. + */ + void set_findex(const u_int16_t fc_index); + + /** + * \brief Closes the read file handle and nulls the active fcntl pointer. Moves to state closed. + */ + void unset_findex(); + + /** + * \brief Check the state: true = open; false = closed. + */ + inline bool is_active() const { return _curr_fc != 0 && _fh >= 0; } + + /** + * \brief Sets the validity flag which indicates that the read buffers contain valid data for reading. + */ + inline void set_invalid() { _valid = false; } + + /** + * \brief Resets the validity flag wich indicates that the read buffers are no longer synchronized and cannot + * be read whithout resynchronization. + */ + inline void set_valid() { _valid = true; } + + /** + * \brief Checks the read buffer validity status: true = valid, can be read; false = invalid, synchronization + * required. + */ + inline bool is_valid() const { return _valid; } + + /** + * \brief Rotate active file controller to next file in rotating file group. + * \exception jerrno::JERR__NINIT if called before calling initialize(). + */ + iores rotate(); + + inline int fh() const { return _fh; } + + inline u_int32_t subm_cnt_dblks() const { return _curr_fc->rd_subm_cnt_dblks(); } + inline std::size_t subm_offs() const { return _curr_fc->rd_subm_offs(); } + inline u_int32_t add_subm_cnt_dblks(u_int32_t a) { return _curr_fc->add_rd_subm_cnt_dblks(a); } + + inline u_int32_t cmpl_cnt_dblks() const { return _curr_fc->rd_cmpl_cnt_dblks(); } + inline std::size_t cmpl_offs() const { return _curr_fc->rd_cmpl_offs(); } + inline u_int32_t add_cmpl_cnt_dblks(u_int32_t a) { return _curr_fc->add_rd_cmpl_cnt_dblks(a); } + + inline bool is_void() const { return _curr_fc->rd_void(); } + inline bool is_empty() const { return _curr_fc->rd_empty(); } + inline u_int32_t remaining_dblks() const { return _curr_fc->rd_remaining_dblks(); } + inline bool is_full() const { return _curr_fc->is_rd_full(); } + inline bool is_compl() const { return _curr_fc->is_rd_compl(); } + inline u_int32_t aio_outstanding_dblks() const { return _curr_fc->rd_aio_outstanding_dblks(); } + inline bool file_rotate() const { return _curr_fc->rd_file_rotate(); } + inline bool is_wr_aio_outstanding() const { return _curr_fc->wr_aio_outstanding_dblks() > 0; } + + // Debug aid + std::string status_str() const; + + protected: + void open_fh(const std::string& fn); + void close_fh(); + }; // class rrfc + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_RRFC_H diff --git a/cpp/src/qpid/legacystore/jrnl/slock.cpp b/cpp/src/qpid/legacystore/jrnl/slock.cpp new file mode 100644 index 0000000000..8f26d349ef --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/slock.cpp @@ -0,0 +1,33 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file slock.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::slock (scoped lock). See + * comments in file slock.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/slock.h" diff --git a/cpp/src/qpid/legacystore/jrnl/slock.h b/cpp/src/qpid/legacystore/jrnl/slock.h new file mode 100644 index 0000000000..c05b5cf336 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/slock.h @@ -0,0 +1,85 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file slock.h + * + * Qpid asynchronous store plugin library + * + * Messaging journal scoped lock class mrg::journal::slock and scoped try-lock + * class mrg::journal::stlock. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_SLOCK_H +#define QPID_LEGACYSTORE_JRNL_SLOCK_H + +#include "qpid/legacystore/jrnl/jexception.h" +#include "qpid/legacystore/jrnl/smutex.h" +#include <pthread.h> + +namespace mrg +{ +namespace journal +{ + + // Ultra-simple scoped lock class, auto-releases mutex when it goes out-of-scope + class slock + { + protected: + const smutex& _sm; + public: + inline slock(const smutex& sm) : _sm(sm) + { + PTHREAD_CHK(::pthread_mutex_lock(_sm.get()), "::pthread_mutex_lock", "slock", "slock"); + } + inline ~slock() + { + PTHREAD_CHK(::pthread_mutex_unlock(_sm.get()), "::pthread_mutex_unlock", "slock", "~slock"); + } + }; + + // Ultra-simple scoped try-lock class, auto-releases mutex when it goes out-of-scope + class stlock + { + protected: + const smutex& _sm; + bool _locked; + public: + inline stlock(const smutex& sm) : _sm(sm), _locked(false) + { + int ret = ::pthread_mutex_trylock(_sm.get()); + _locked = (ret == 0); // check if lock obtained + if (!_locked && ret != EBUSY) PTHREAD_CHK(ret, "::pthread_mutex_trylock", "stlock", "stlock"); + } + inline ~stlock() + { + if (_locked) + PTHREAD_CHK(::pthread_mutex_unlock(_sm.get()), "::pthread_mutex_unlock", "stlock", "~stlock"); + } + inline bool locked() const { return _locked; } + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_SLOCK_H diff --git a/cpp/src/qpid/legacystore/jrnl/smutex.cpp b/cpp/src/qpid/legacystore/jrnl/smutex.cpp new file mode 100644 index 0000000000..6f8991ca5b --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/smutex.cpp @@ -0,0 +1,33 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file smutex.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::smutex (scoped mutex). See + * comments in file smutex.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/smutex.h" diff --git a/cpp/src/qpid/legacystore/jrnl/smutex.h b/cpp/src/qpid/legacystore/jrnl/smutex.h new file mode 100644 index 0000000000..def0fb70f6 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/smutex.h @@ -0,0 +1,64 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file smutex.h + * + * Qpid asynchronous store plugin library + * + * Messaging journal scoped mutex class mrg::journal::smutex. + * + * \author Kim van der Riet + */ + + +#ifndef QPID_LEGACYSTORE_JRNL_SMUTEX_H +#define QPID_LEGACYSTORE_JRNL_SMUTEX_H + +#include "qpid/legacystore/jrnl/jexception.h" +#include <pthread.h> + +namespace mrg +{ +namespace journal +{ + + // Ultra-simple scoped mutex class that allows a posix mutex to be initialized and destroyed with error checks + class smutex + { + protected: + mutable pthread_mutex_t _m; + public: + inline smutex() + { + PTHREAD_CHK(::pthread_mutex_init(&_m, 0), "::pthread_mutex_init", "smutex", "smutex"); + } + inline virtual ~smutex() + { + PTHREAD_CHK(::pthread_mutex_destroy(&_m), "::pthread_mutex_destroy", "smutex", "~smutex"); + } + inline pthread_mutex_t* get() const { return &_m; } + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_SMUTEX_H diff --git a/cpp/src/qpid/legacystore/jrnl/time_ns.cpp b/cpp/src/qpid/legacystore/jrnl/time_ns.cpp new file mode 100644 index 0000000000..976068ef68 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/time_ns.cpp @@ -0,0 +1,55 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file time_ns.cpp + * + * Qpid asynchronous store plugin library + * + * Messaging journal time struct mrg::journal::time_ns, derived from + * the timespec struct and provided with helper functions. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/time_ns.h" + +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +const std::string +time_ns::str(int precision) const +{ + const double t = tv_sec + (tv_nsec/1e9); + std::ostringstream oss; + oss.setf(std::ios::fixed, std::ios::floatfield); + oss.precision(precision); + oss << t; + return oss.str(); +} + + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/time_ns.h b/cpp/src/qpid/legacystore/jrnl/time_ns.h new file mode 100644 index 0000000000..a9f69e2631 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/time_ns.h @@ -0,0 +1,105 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file time_ns.h + * + * Qpid asynchronous store plugin library + * + * Messaging journal time struct mrg::journal::time_ns, derived from + * the timespec struct and provided with helper functions. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_TIME_NS_H +#define QPID_LEGACYSTORE_JRNL_TIME_NS_H + +#include <cerrno> +#include <ctime> +#include <string> + +namespace mrg +{ +namespace journal +{ + +struct time_ns : public timespec +{ + inline time_ns() { tv_sec = 0; tv_nsec = 0; } + inline time_ns(const std::time_t sec, const long nsec = 0) { tv_sec = sec; tv_nsec = nsec; } + inline time_ns(const time_ns& t) { tv_sec = t.tv_sec; tv_nsec = t.tv_nsec; } + + inline void set_zero() { tv_sec = 0; tv_nsec = 0; } + inline bool is_zero() const { return tv_sec == 0 && tv_nsec == 0; } + inline int now() { if(::clock_gettime(CLOCK_REALTIME, this)) return errno; return 0; } + const std::string str(int precision = 6) const; + + inline time_ns& operator=(const time_ns& rhs) + { tv_sec = rhs.tv_sec; tv_nsec = rhs.tv_nsec; return *this; } + inline time_ns& operator+=(const time_ns& rhs) + { + tv_nsec += rhs.tv_nsec; + if (tv_nsec >= 1000000000L) { tv_sec++; tv_nsec -= 1000000000L; } + tv_sec += rhs.tv_sec; + return *this; + } + inline time_ns& operator+=(const long ns) + { + tv_nsec += ns; + if (tv_nsec >= 1000000000L) { tv_sec++; tv_nsec -= 1000000000L; } + return *this; + } + inline time_ns& operator-=(const long ns) + { + tv_nsec -= ns; + if (tv_nsec < 0) { tv_sec--; tv_nsec += 1000000000L; } + return *this; + } + inline time_ns& operator-=(const time_ns& rhs) + { + tv_nsec -= rhs.tv_nsec; + if (tv_nsec < 0) { tv_sec--; tv_nsec += 1000000000L; } + tv_sec -= rhs.tv_sec; + return *this; + } + inline const time_ns operator+(const time_ns& rhs) + { time_ns t(*this); t += rhs; return t; } + inline const time_ns operator-(const time_ns& rhs) + { time_ns t(*this); t -= rhs; return t; } + inline bool operator==(const time_ns& rhs) + { return tv_sec == rhs.tv_sec && tv_nsec == rhs.tv_nsec; } + inline bool operator!=(const time_ns& rhs) + { return tv_sec != rhs.tv_sec || tv_nsec != rhs.tv_nsec; } + inline bool operator>(const time_ns& rhs) + { if(tv_sec == rhs.tv_sec) return tv_nsec > rhs.tv_nsec; return tv_sec > rhs.tv_sec; } + inline bool operator>=(const time_ns& rhs) + { if(tv_sec == rhs.tv_sec) return tv_nsec >= rhs.tv_nsec; return tv_sec >= rhs.tv_sec; } + inline bool operator<(const time_ns& rhs) + { if(tv_sec == rhs.tv_sec) return tv_nsec < rhs.tv_nsec; return tv_sec < rhs.tv_sec; } + inline bool operator<=(const time_ns& rhs) + { if(tv_sec == rhs.tv_sec) return tv_nsec <= rhs.tv_nsec; return tv_sec <= rhs.tv_sec; } +}; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_TIME_NS_H diff --git a/cpp/src/qpid/legacystore/jrnl/txn_hdr.h b/cpp/src/qpid/legacystore/jrnl/txn_hdr.h new file mode 100644 index 0000000000..94b812ccec --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/txn_hdr.h @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file txn_hdr.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::txn_hdr (transaction + * record header), used to start a transaction (commit or abort) record. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_TXN_HDR_H +#define QPID_LEGACYSTORE_JRNL_TXN_HDR_H + +#include <cstddef> +#include "qpid/legacystore/jrnl/rec_hdr.h" + +namespace mrg +{ +namespace journal +{ + +#pragma pack(1) + + /** + * \brief Struct for transaction commit and abort records. + * + * Struct for DTX commit and abort records. Only the magic distinguishes between them. Since + * this record must be used in the context of a valid XID, the xidsize field must not be zero. + * Immediately following this record is the XID itself which is xidsize bytes long, followed by + * a rec_tail. + * + * Note that this record had its own rid distinct from the rids of the record(s) making up the + * transaction it is committing or aborting. + * + * Record header info in binary format (24 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ -+ + * | magic | v | e | flags | | + * +---+---+---+---+---+---+---+---+ | struct hdr + * | rid | | + * +---+---+---+---+---+---+---+---+ -+ + * | xidsize | + * +---+---+---+---+---+---+---+---+ + * v = file version (If the format or encoding of this file changes, then this + * number should be incremented) + * e = endian flag, false (0x00) for little endian, true (0x01) for big endian + * </pre> + * + * Note that journal files should be transferable between 32- and 64-bit + * hardware of the same endianness, but not between hardware of opposite + * entianness without some sort of binary conversion utility. Thus buffering + * will be needed for types that change size between 32- and 64-bit compiles. + */ + struct txn_hdr : rec_hdr + { +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler0; ///< Big-endian filler for 32-bit size_t +#endif + std::size_t _xidsize; ///< XID size +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + u_int32_t _filler0; ///< Little-endian filler for 32-bit size_t +#endif + + /** + * \brief Default constructor, which sets all values to 0. + */ + txn_hdr(): rec_hdr(), +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif + _xidsize(0) +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + , _filler0(0) +#endif + {} + + /** + * \brief Convenience constructor which initializes values during construction. + */ + txn_hdr(const u_int32_t magic, const u_int8_t version, const u_int64_t rid, + const std::size_t xidsize, const bool owi): rec_hdr(magic, version, rid, owi), +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + _filler0(0), +#endif + _xidsize(xidsize) +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + , _filler0(0) +#endif + {} + + /** + * \brief Returns the size of the header in bytes. + */ + inline static std::size_t size() { return sizeof(txn_hdr); } + }; + +#pragma pack() + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_TXN_HDR_H diff --git a/cpp/src/qpid/legacystore/jrnl/txn_map.cpp b/cpp/src/qpid/legacystore/jrnl/txn_map.cpp new file mode 100644 index 0000000000..c514670601 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/txn_map.cpp @@ -0,0 +1,256 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file txn_map.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::txn_map (transaction map). See + * comments in file txn_map.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/txn_map.h" + +#include <iomanip> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include "qpid/legacystore/jrnl/slock.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +// return/error codes +int16_t txn_map::TMAP_RID_NOT_FOUND = -2; +int16_t txn_map::TMAP_XID_NOT_FOUND = -1; +int16_t txn_map::TMAP_OK = 0; +int16_t txn_map::TMAP_NOT_SYNCED = 0; +int16_t txn_map::TMAP_SYNCED = 1; + +txn_data_struct::txn_data_struct(const u_int64_t rid, const u_int64_t drid, const u_int16_t pfid, + const bool enq_flag, const bool commit_flag): + _rid(rid), + _drid(drid), + _pfid(pfid), + _enq_flag(enq_flag), + _commit_flag(commit_flag), + _aio_compl(false) +{} + +txn_map::txn_map(): + _map(), + _pfid_txn_cnt() +{} + +txn_map::~txn_map() {} + +void +txn_map::set_num_jfiles(const u_int16_t num_jfiles) +{ + _pfid_txn_cnt.resize(num_jfiles, 0); +} + +u_int32_t +txn_map::get_txn_pfid_cnt(const u_int16_t pfid) const +{ + return _pfid_txn_cnt.at(pfid); +} + +bool +txn_map::insert_txn_data(const std::string& xid, const txn_data& td) +{ + bool ok = true; + slock s(_mutex); + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // not found in map + { + txn_data_list list; + list.push_back(td); + std::pair<xmap_itr, bool> ret = _map.insert(xmap_param(xid, list)); + if (!ret.second) // duplicate + ok = false; + } + else + itr->second.push_back(td); + _pfid_txn_cnt.at(td._pfid)++; + return ok; +} + +const txn_data_list +txn_map::get_tdata_list(const std::string& xid) +{ + slock s(_mutex); + return get_tdata_list_nolock(xid); +} + +const txn_data_list +txn_map::get_tdata_list_nolock(const std::string& xid) +{ + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // not found in map + return _empty_data_list; + return itr->second; +} + +const txn_data_list +txn_map::get_remove_tdata_list(const std::string& xid) +{ + slock s(_mutex); + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // not found in map + return _empty_data_list; + txn_data_list list = itr->second; + _map.erase(itr); + for (tdl_itr i=list.begin(); i!=list.end(); i++) + _pfid_txn_cnt.at(i->_pfid)--; + return list; +} + +bool +txn_map::in_map(const std::string& xid) +{ + slock s(_mutex); + xmap_itr itr= _map.find(xid); + return itr != _map.end(); +} + +u_int32_t +txn_map::enq_cnt() +{ + return cnt(true); +} + +u_int32_t +txn_map::deq_cnt() +{ + return cnt(true); +} + +u_int32_t +txn_map::cnt(const bool enq_flag) +{ + slock s(_mutex); + u_int32_t c = 0; + for (xmap_itr i = _map.begin(); i != _map.end(); i++) + { + for (tdl_itr j = i->second.begin(); j < i->second.end(); j++) + { + if (j->_enq_flag == enq_flag) + c++; + } + } + return c; +} + +int16_t +txn_map::is_txn_synced(const std::string& xid) +{ + slock s(_mutex); + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // not found in map + return TMAP_XID_NOT_FOUND; + bool is_synced = true; + for (tdl_itr litr = itr->second.begin(); litr < itr->second.end(); litr++) + { + if (!litr->_aio_compl) + { + is_synced = false; + break; + } + } + return is_synced ? TMAP_SYNCED : TMAP_NOT_SYNCED; +} + +int16_t +txn_map::set_aio_compl(const std::string& xid, const u_int64_t rid) +{ + slock s(_mutex); + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // xid not found in map + return TMAP_XID_NOT_FOUND; + for (tdl_itr litr = itr->second.begin(); litr < itr->second.end(); litr++) + { + if (litr->_rid == rid) + { + litr->_aio_compl = true; + return TMAP_OK; // rid found + } + } + // xid present, but rid not found + return TMAP_RID_NOT_FOUND; +} + +bool +txn_map::data_exists(const std::string& xid, const u_int64_t rid) +{ + bool found = false; + { + slock s(_mutex); + txn_data_list tdl = get_tdata_list_nolock(xid); + tdl_itr itr = tdl.begin(); + while (itr != tdl.end() && !found) + { + found = itr->_rid == rid; + itr++; + } + } + return found; +} + +bool +txn_map::is_enq(const u_int64_t rid) +{ + bool found = false; + { + slock s(_mutex); + for (xmap_itr i = _map.begin(); i != _map.end() && !found; i++) + { + txn_data_list list = i->second; + for (tdl_itr j = list.begin(); j < list.end() && !found; j++) + { + if (j->_enq_flag) + found = j->_rid == rid; + else + found = j->_drid == rid; + } + } + } + return found; +} + +void +txn_map::xid_list(std::vector<std::string>& xv) +{ + xv.clear(); + { + slock s(_mutex); + for (xmap_itr itr = _map.begin(); itr != _map.end(); itr++) + xv.push_back(itr->first); + } +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/txn_map.h b/cpp/src/qpid/legacystore/jrnl/txn_map.h new file mode 100644 index 0000000000..6b38564e53 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/txn_map.h @@ -0,0 +1,159 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file txn_map.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::txn_map (transaction map). + * See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_TXN_MAP_H +#define QPID_LEGACYSTORE_JRNL_TXN_MAP_H + +namespace mrg +{ +namespace journal +{ + class txn_map; +} +} + +#include "qpid/legacystore/jrnl/smutex.h" +#include <map> +#include <pthread.h> +#include <string> +#include <sys/types.h> +#include <vector> + +namespace mrg +{ +namespace journal +{ + + /** + * \struct txn_data_struct + * \brief Struct encapsulating transaction data necessary for processing a transaction + * in the journal once it is closed with either a commit or abort. + */ + struct txn_data_struct + { + u_int64_t _rid; ///< Record id for this operation + u_int64_t _drid; ///< Dequeue record id for this operation + u_int16_t _pfid; ///< Physical file id, to be used when transferring to emap on commit + bool _enq_flag; ///< If true, enq op, otherwise deq op + bool _commit_flag; ///< (2PC transactions) Records 2PC complete c/a mode + bool _aio_compl; ///< Initially false, set to true when record AIO returns + txn_data_struct(const u_int64_t rid, const u_int64_t drid, const u_int16_t pfid, + const bool enq_flag, const bool commit_flag = false); + }; + typedef txn_data_struct txn_data; + typedef std::vector<txn_data> txn_data_list; + typedef txn_data_list::iterator tdl_itr; + + /** + * \class txn_map + * \brief Class for storing transaction data for each open (ie not committed or aborted) + * xid in the store. If aborted, records are discarded; if committed, they are + * transferred to the enqueue map. + * + * The data is encapsulated by struct txn_data_struct. A vector containing the information + * for each operation included as part of the same transaction is mapped against the + * xid. + * + * The aio_compl flag is set true as each AIO write operation for the enqueue or dequeue + * returns. Checking that all of these flags are true for a given xid is the mechanism + * used to determine if the transaction is syncronized (through method is_txn_synced()). + * + * On transaction commit, then each operation is handled as follows: + * + * If an enqueue (_enq_flag is true), then the rid and pfid are transferred to the enq_map. + * If a dequeue (_enq_flag is false), then the rid stored in the drid field is used to + * remove the corresponding record from the enq_map. + * + * On transaction abort, then each operation is handled as follows: + * + * If an enqueue (_enq_flag is true), then the data is simply discarded. + * If a dequeue (_enq_flag is false), then the lock for the corresponding enqueue in enq_map + * (if not a part of the same transaction) is removed, and the data discarded. + * + * <pre> + * key data + * + * xid1 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] > + * xid2 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] > + * xid3 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] > + * ... + * </pre> + */ + class txn_map + { + public: + // return/error codes + static int16_t TMAP_RID_NOT_FOUND; + static int16_t TMAP_XID_NOT_FOUND; + static int16_t TMAP_OK; + static int16_t TMAP_NOT_SYNCED; + static int16_t TMAP_SYNCED; + + private: + typedef std::pair<std::string, txn_data_list> xmap_param; + typedef std::map<std::string, txn_data_list> xmap; + typedef xmap::iterator xmap_itr; + + xmap _map; + smutex _mutex; + std::vector<u_int32_t> _pfid_txn_cnt; + const txn_data_list _empty_data_list; + + public: + txn_map(); + virtual ~txn_map(); + + void set_num_jfiles(const u_int16_t num_jfiles); + u_int32_t get_txn_pfid_cnt(const u_int16_t pfid) const; + bool insert_txn_data(const std::string& xid, const txn_data& td); + const txn_data_list get_tdata_list(const std::string& xid); + const txn_data_list get_remove_tdata_list(const std::string& xid); + bool in_map(const std::string& xid); + u_int32_t enq_cnt(); + u_int32_t deq_cnt(); + int16_t is_txn_synced(const std::string& xid); // -1=xid not found; 0=not synced; 1=synced + int16_t set_aio_compl(const std::string& xid, const u_int64_t rid); // -2=rid not found; -1=xid not found; 0=done + bool data_exists(const std::string& xid, const u_int64_t rid); + bool is_enq(const u_int64_t rid); + inline void clear() { _map.clear(); } + inline bool empty() const { return _map.empty(); } + inline size_t size() const { return _map.size(); } + void xid_list(std::vector<std::string>& xv); + private: + u_int32_t cnt(const bool enq_flag); + const txn_data_list get_tdata_list_nolock(const std::string& xid); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_TXN_MAP_H diff --git a/cpp/src/qpid/legacystore/jrnl/txn_rec.cpp b/cpp/src/qpid/legacystore/jrnl/txn_rec.cpp new file mode 100644 index 0000000000..918a6ce902 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/txn_rec.cpp @@ -0,0 +1,447 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file txn_rec.cpp + * + * Qpid asynchronous store plugin library + * + * This file contains the code for the mrg::journal::txn_rec (journal dequeue + * record) class. See comments in file txn_rec.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/txn_rec.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <cstring> +#include <iomanip> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +txn_rec::txn_rec(): + _txn_hdr(), + _xidp(0), + _buff(0), + _txn_tail() +{ + _txn_hdr._version = RHM_JDAT_VERSION; +} + +txn_rec::txn_rec(const u_int32_t magic, const u_int64_t rid, const void* const xidp, + const std::size_t xidlen, const bool owi): + _txn_hdr(magic, RHM_JDAT_VERSION, rid, xidlen, owi), + _xidp(xidp), + _buff(0), + _txn_tail(_txn_hdr) +{} + +txn_rec::~txn_rec() +{ + clean(); +} + +void +txn_rec::reset(const u_int32_t magic) +{ + _txn_hdr._magic = magic; + _txn_hdr._rid = 0; + _txn_hdr._xidsize = 0; + _xidp = 0; + _buff = 0; + _txn_tail._xmagic = ~magic; + _txn_tail._rid = 0; +} + +void +txn_rec::reset(const u_int32_t magic, const u_int64_t rid, const void* const xidp, + const std::size_t xidlen, const bool owi) +{ + _txn_hdr._magic = magic; + _txn_hdr._rid = rid; + _txn_hdr.set_owi(owi); + _txn_hdr._xidsize = xidlen; + _xidp = xidp; + _buff = 0; + _txn_tail._xmagic = ~magic; + _txn_tail._rid = rid; +} + +u_int32_t +txn_rec::encode(void* wptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks) +{ + assert(wptr != 0); + assert(max_size_dblks > 0); + assert(_xidp != 0 && _txn_hdr._xidsize > 0); + + std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + std::size_t rem = max_size_dblks * JRNL_DBLK_SIZE; + std::size_t wr_cnt = 0; + if (rec_offs_dblks) // Continuation of split dequeue record (over 2 or more pages) + { + if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required + { + rec_offs -= sizeof(_txn_hdr); + std::size_t wsize = _txn_hdr._xidsize > rec_offs ? _txn_hdr._xidsize - rec_offs : 0; + std::size_t wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= _txn_hdr._xidsize - wsize2; + if (rem) + { + wsize = sizeof(_txn_tail) > rec_offs ? sizeof(_txn_tail) - rec_offs : 0; + wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy((char*)wptr + wr_cnt, (char*)&_txn_tail + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= sizeof(_txn_tail) - wsize2; + } + assert(rem == 0); + assert(rec_offs == 0); + } + else // No further split required + { + rec_offs -= sizeof(_txn_hdr); + std::size_t wsize = _txn_hdr._xidsize > rec_offs ? _txn_hdr._xidsize - rec_offs : 0; + if (wsize) + { + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + } + rec_offs -= _txn_hdr._xidsize - wsize; + wsize = sizeof(_txn_tail) > rec_offs ? sizeof(_txn_tail) - rec_offs : 0; + if (wsize) + { + std::memcpy((char*)wptr + wr_cnt, (char*)&_txn_tail + rec_offs, wsize); + wr_cnt += wsize; +#ifdef RHM_CLEAN + std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * JRNL_DBLK_SIZE; + std::memset((char*)wptr + wr_cnt, RHM_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + rec_offs -= sizeof(_txn_tail) - wsize; + assert(rec_offs == 0); + } + } + else // Start at beginning of data record + { + // Assumption: the header will always fit into the first dblk + std::memcpy(wptr, (void*)&_txn_hdr, sizeof(_txn_hdr)); + wr_cnt = sizeof(_txn_hdr); + if (size_dblks(rec_size()) > max_size_dblks) // Split required + { + std::size_t wsize; + rem -= sizeof(_txn_hdr); + if (rem) + { + wsize = rem >= _txn_hdr._xidsize ? _txn_hdr._xidsize : rem; + std::memcpy((char*)wptr + wr_cnt, _xidp, wsize); + wr_cnt += wsize; + rem -= wsize; + } + if (rem) + { + wsize = rem >= sizeof(_txn_tail) ? sizeof(_txn_tail) : rem; + std::memcpy((char*)wptr + wr_cnt, (void*)&_txn_tail, wsize); + wr_cnt += wsize; + rem -= wsize; + } + assert(rem == 0); + } + else // No split required + { + std::memcpy((char*)wptr + wr_cnt, _xidp, _txn_hdr._xidsize); + wr_cnt += _txn_hdr._xidsize; + std::memcpy((char*)wptr + wr_cnt, (void*)&_txn_tail, sizeof(_txn_tail)); + wr_cnt += sizeof(_txn_tail); +#ifdef RHM_CLEAN + std::size_t dblk_rec_size = size_dblks(rec_size()) * JRNL_DBLK_SIZE; + std::memset((char*)wptr + wr_cnt, RHM_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + } + return size_dblks(wr_cnt); +} + +u_int32_t +txn_rec::decode(rec_hdr& h, void* rptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks) +{ + assert(rptr != 0); + assert(max_size_dblks > 0); + + std::size_t rd_cnt = 0; + if (rec_offs_dblks) // Continuation of record on new page + { + const u_int32_t hdr_xid_dblks = size_dblks(txn_hdr::size() + _txn_hdr._xidsize); + const u_int32_t hdr_xid_tail_dblks = size_dblks(txn_hdr::size() + _txn_hdr._xidsize + + rec_tail::size()); + const std::size_t rec_offs = rec_offs_dblks * JRNL_DBLK_SIZE; + + if (hdr_xid_tail_dblks - rec_offs_dblks <= max_size_dblks) + { + // Remainder of xid fits within this page + if (rec_offs - txn_hdr::size() < _txn_hdr._xidsize) + { + // Part of xid still outstanding, copy remainder of xid and tail + const std::size_t xid_offs = rec_offs - txn_hdr::size(); + const std::size_t xid_rem = _txn_hdr._xidsize - xid_offs; + std::memcpy((char*)_buff + xid_offs, rptr, xid_rem); + rd_cnt = xid_rem; + std::memcpy((void*)&_txn_tail, ((char*)rptr + rd_cnt), sizeof(_txn_tail)); + chk_tail(); + rd_cnt += sizeof(_txn_tail); + } + else + { + // Tail or part of tail only outstanding, complete tail + const std::size_t tail_offs = rec_offs - txn_hdr::size() - _txn_hdr._xidsize; + const std::size_t tail_rem = rec_tail::size() - tail_offs; + std::memcpy((char*)&_txn_tail + tail_offs, rptr, tail_rem); + chk_tail(); + rd_cnt = tail_rem; + } + } + else if (hdr_xid_dblks - rec_offs_dblks <= max_size_dblks) + { + // Remainder of xid fits within this page, tail split + const std::size_t xid_offs = rec_offs - txn_hdr::size(); + const std::size_t xid_rem = _txn_hdr._xidsize - xid_offs; + std::memcpy((char*)_buff + xid_offs, rptr, xid_rem); + rd_cnt += xid_rem; + const std::size_t tail_rem = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + if (tail_rem) + { + std::memcpy((void*)&_txn_tail, ((char*)rptr + xid_rem), tail_rem); + rd_cnt += tail_rem; + } + } + else + { + // Remainder of xid split + const std::size_t xid_cp_size = (max_size_dblks * JRNL_DBLK_SIZE); + std::memcpy((char*)_buff + rec_offs - txn_hdr::size(), rptr, xid_cp_size); + rd_cnt += xid_cp_size; + } + } + else // Start of record + { + // Get and check header + _txn_hdr.hdr_copy(h); + rd_cnt = sizeof(rec_hdr); +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + rd_cnt += sizeof(u_int32_t); // Filler 0 +#endif + _txn_hdr._xidsize = *(std::size_t*)((char*)rptr + rd_cnt); + rd_cnt = _txn_hdr.size(); + chk_hdr(); + _buff = std::malloc(_txn_hdr._xidsize); + MALLOC_CHK(_buff, "_buff", "txn_rec", "decode"); + const u_int32_t hdr_xid_dblks = size_dblks(txn_hdr::size() + _txn_hdr._xidsize); + const u_int32_t hdr_xid_tail_dblks = size_dblks(txn_hdr::size() + _txn_hdr._xidsize + + rec_tail::size()); + + // Check if record (header + xid + tail) fits within this page, we can check the + // tail before the expense of copying data to memory + if (hdr_xid_tail_dblks <= max_size_dblks) + { + // Entire header, xid and tail fits within this page + std::memcpy(_buff, (char*)rptr + rd_cnt, _txn_hdr._xidsize); + rd_cnt += _txn_hdr._xidsize; + std::memcpy((void*)&_txn_tail, (char*)rptr + rd_cnt, sizeof(_txn_tail)); + rd_cnt += sizeof(_txn_tail); + chk_tail(); + } + else if (hdr_xid_dblks <= max_size_dblks) + { + // Entire header and xid fit within this page, tail split + std::memcpy(_buff, (char*)rptr + rd_cnt, _txn_hdr._xidsize); + rd_cnt += _txn_hdr._xidsize; + const std::size_t tail_rem = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + if (tail_rem) + { + std::memcpy((void*)&_txn_tail, (char*)rptr + rd_cnt, tail_rem); + rd_cnt += tail_rem; + } + } + else + { + // Header fits within this page, xid split + const std::size_t xid_cp_size = (max_size_dblks * JRNL_DBLK_SIZE) - rd_cnt; + std::memcpy(_buff, (char*)rptr + rd_cnt, xid_cp_size); + rd_cnt += xid_cp_size; + } + } + return size_dblks(rd_cnt); +} + +bool +txn_rec::rcv_decode(rec_hdr h, std::ifstream* ifsp, std::size_t& rec_offs) +{ + if (rec_offs == 0) + { + // Read header, allocate for xid + _txn_hdr.hdr_copy(h); +#if defined(JRNL_BIG_ENDIAN) && defined(JRNL_32_BIT) + ifsp->ignore(sizeof(u_int32_t)); // _filler0 +#endif + ifsp->read((char*)&_txn_hdr._xidsize, sizeof(std::size_t)); +#if defined(JRNL_LITTLE_ENDIAN) && defined(JRNL_32_BIT) + ifsp->ignore(sizeof(u_int32_t)); // _filler0 +#endif + rec_offs = sizeof(_txn_hdr); + _buff = std::malloc(_txn_hdr._xidsize); + MALLOC_CHK(_buff, "_buff", "txn_rec", "rcv_decode"); + } + if (rec_offs < sizeof(_txn_hdr) + _txn_hdr._xidsize) + { + // Read xid (or continue reading xid) + std::size_t offs = rec_offs - sizeof(_txn_hdr); + ifsp->read((char*)_buff + offs, _txn_hdr._xidsize - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < _txn_hdr._xidsize - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + if (rec_offs < sizeof(_txn_hdr) + _txn_hdr._xidsize + sizeof(rec_tail)) + { + // Read tail (or continue reading tail) + std::size_t offs = rec_offs - sizeof(_txn_hdr) - _txn_hdr._xidsize; + ifsp->read((char*)&_txn_tail + offs, sizeof(rec_tail) - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < sizeof(rec_tail) - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + ifsp->ignore(rec_size_dblks() * JRNL_DBLK_SIZE - rec_size()); + chk_tail(); // Throws if tail invalid or record incomplete + assert(!ifsp->fail() && !ifsp->bad()); + return true; +} + +std::size_t +txn_rec::get_xid(void** const xidpp) +{ + if (!_buff) + { + *xidpp = 0; + return 0; + } + *xidpp = _buff; + return _txn_hdr._xidsize; +} + +std::string& +txn_rec::str(std::string& str) const +{ + std::ostringstream oss; + if (_txn_hdr._magic == RHM_JDAT_TXA_MAGIC) + oss << "dtxa_rec: m=" << _txn_hdr._magic; + else + oss << "dtxc_rec: m=" << _txn_hdr._magic; + oss << " v=" << (int)_txn_hdr._version; + oss << " rid=" << _txn_hdr._rid; + oss << " xid=\"" << _xidp << "\""; + str.append(oss.str()); + return str; +} + +std::size_t +txn_rec::xid_size() const +{ + return _txn_hdr._xidsize; +} + +std::size_t +txn_rec::rec_size() const +{ + return txn_hdr::size() + _txn_hdr._xidsize + rec_tail::size(); +} + +void +txn_rec::chk_hdr() const +{ + jrec::chk_hdr(_txn_hdr); + if (_txn_hdr._magic != RHM_JDAT_TXA_MAGIC && _txn_hdr._magic != RHM_JDAT_TXC_MAGIC) + { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "dtx magic: rid=0x" << std::setw(16) << _txn_hdr._rid; + oss << ": expected=(0x" << std::setw(8) << RHM_JDAT_TXA_MAGIC; + oss << " or 0x" << RHM_JDAT_TXC_MAGIC; + oss << ") read=0x" << std::setw(2) << (int)_txn_hdr._magic; + throw jexception(jerrno::JERR_JREC_BADRECHDR, oss.str(), "txn_rec", "chk_hdr"); + } +} + +void +txn_rec::chk_hdr(u_int64_t rid) const +{ + chk_hdr(); + jrec::chk_rid(_txn_hdr, rid); +} + +void +txn_rec::chk_tail() const +{ + jrec::chk_tail(_txn_tail, _txn_hdr); +} + +void +txn_rec::clean() +{ + // clean up allocated memory here +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/txn_rec.h b/cpp/src/qpid/legacystore/jrnl/txn_rec.h new file mode 100644 index 0000000000..1a49df1c96 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/txn_rec.h @@ -0,0 +1,101 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file txn_rec.h + * + * Qpid asynchronous store plugin library + * + * This file contains the code for the mrg::journal::txn_rec (journal data + * record) class. See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_TXN_REC_H +#define QPID_LEGACYSTORE_JRNL_TXN_REC_H + +namespace mrg +{ +namespace journal +{ +class txn_rec; +} +} + +#include <cstddef> +#include "qpid/legacystore/jrnl/jrec.h" +#include "qpid/legacystore/jrnl/txn_hdr.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \class txn_rec + * \brief Class to handle a single journal DTX commit or abort record. + */ + class txn_rec : public jrec + { + private: + txn_hdr _txn_hdr; ///< transaction header + const void* _xidp; ///< xid pointer for encoding (writing to disk) + void* _buff; ///< Pointer to buffer to receive data read from disk + rec_tail _txn_tail; ///< Record tail + + public: + // constructor used for read operations and xid must have memory allocated + txn_rec(); + // constructor used for write operations, where xid already exists + txn_rec(const u_int32_t magic, const u_int64_t rid, const void* const xidp, + const std::size_t xidlen, const bool owi); + virtual ~txn_rec(); + + // Prepare instance for use in reading data from journal + void reset(const u_int32_t magic); + // Prepare instance for use in writing data to journal + void reset(const u_int32_t magic, const u_int64_t rid, const void* const xidp, + const std::size_t xidlen, const bool owi); + u_int32_t encode(void* wptr, u_int32_t rec_offs_dblks, u_int32_t max_size_dblks); + u_int32_t decode(rec_hdr& h, void* rptr, u_int32_t rec_offs_dblks, + u_int32_t max_size_dblks); + // Decode used for recover + bool rcv_decode(rec_hdr h, std::ifstream* ifsp, std::size_t& rec_offs); + + std::size_t get_xid(void** const xidpp); + std::string& str(std::string& str) const; + inline std::size_t data_size() const { return 0; } // This record never carries data + std::size_t xid_size() const; + std::size_t rec_size() const; + inline u_int64_t rid() const { return _txn_hdr._rid; } + + private: + void chk_hdr() const; + void chk_hdr(u_int64_t rid) const; + void chk_tail() const; + virtual void clean(); + }; // class txn_rec + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_TXN_REC_H diff --git a/cpp/src/qpid/legacystore/jrnl/wmgr.cpp b/cpp/src/qpid/legacystore/jrnl/wmgr.cpp new file mode 100644 index 0000000000..4353fcfbca --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/wmgr.cpp @@ -0,0 +1,1051 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file wmgr.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::wmgr (write manager). See + * comments in file wmgr.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/wmgr.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <cstring> +#include "qpid/legacystore/jrnl/file_hdr.h" +#include "qpid/legacystore/jrnl/jcntl.h" +#include "qpid/legacystore/jrnl/jerrno.h" +#include <sstream> + +namespace mrg +{ +namespace journal +{ + +wmgr::wmgr(jcntl* jc, enq_map& emap, txn_map& tmap, wrfc& wrfc): + pmgr(jc, emap, tmap), + _wrfc(wrfc), + _max_dtokpp(0), + _max_io_wait_us(0), + _fhdr_base_ptr(0), + _fhdr_ptr_arr(0), + _fhdr_aio_cb_arr(0), + _cached_offset_dblks(0), + _jfsize_dblks(0), + _jfsize_pgs(0), + _num_jfiles(0), + _enq_busy(false), + _deq_busy(false), + _abort_busy(false), + _commit_busy(false), + _txn_pending_set() +{} + +wmgr::wmgr(jcntl* jc, enq_map& emap, txn_map& tmap, wrfc& wrfc, + const u_int32_t max_dtokpp, const u_int32_t max_iowait_us): + pmgr(jc, emap, tmap /* , dtoklp */), + _wrfc(wrfc), + _max_dtokpp(max_dtokpp), + _max_io_wait_us(max_iowait_us), + _fhdr_base_ptr(0), + _fhdr_ptr_arr(0), + _fhdr_aio_cb_arr(0), + _cached_offset_dblks(0), + _jfsize_dblks(0), + _jfsize_pgs(0), + _num_jfiles(0), + _enq_busy(false), + _deq_busy(false), + _abort_busy(false), + _commit_busy(false), + _txn_pending_set() +{} + +wmgr::~wmgr() +{ + wmgr::clean(); +} + +void +wmgr::initialize(aio_callback* const cbp, const u_int32_t wcache_pgsize_sblks, + const u_int16_t wcache_num_pages, const u_int32_t max_dtokpp, const u_int32_t max_iowait_us, + std::size_t eo) +{ + _enq_busy = false; + _deq_busy = false; + _abort_busy = false; + _commit_busy = false; + _max_dtokpp = max_dtokpp; + _max_io_wait_us = max_iowait_us; + + initialize(cbp, wcache_pgsize_sblks, wcache_num_pages); + + _jfsize_dblks = _jc->jfsize_sblks() * JRNL_SBLK_SIZE; + _jfsize_pgs = _jc->jfsize_sblks() / _cache_pgsize_sblks; + assert(_jc->jfsize_sblks() % JRNL_RMGR_PAGE_SIZE == 0); + + if (eo) + { + const u_int32_t wr_pg_size_dblks = _cache_pgsize_sblks * JRNL_SBLK_SIZE; + u_int32_t data_dblks = (eo / JRNL_DBLK_SIZE) - 4; // 4 dblks for file hdr + _pg_cntr = data_dblks / wr_pg_size_dblks; + _pg_offset_dblks = data_dblks - (_pg_cntr * wr_pg_size_dblks); + } +} + +iores +wmgr::enqueue(const void* const data_buff, const std::size_t tot_data_len, + const std::size_t this_data_len, data_tok* dtokp, const void* const xid_ptr, + const std::size_t xid_len, const bool transient, const bool external) +{ + if (xid_len) + assert(xid_ptr != 0); + + if (_deq_busy || _abort_busy || _commit_busy) + return RHM_IORES_BUSY; + + if (this_data_len != tot_data_len && !external) + return RHM_IORES_NOTIMPL; + + iores res = pre_write_check(WMGR_ENQUEUE, dtokp, xid_len, tot_data_len, external); + if (res != RHM_IORES_SUCCESS) + return res; + + bool cont = false; + if (_enq_busy) // If enqueue() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT + { + if (dtokp->wstate() == data_tok::ENQ_PART) + cont = true; + else + { + std::ostringstream oss; + oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_ENQDISCONT, oss.str(), "wmgr", "enqueue"); + } + } + + u_int64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _wrfc.get_incr_rid(); + _enq_rec.reset(rid, data_buff, tot_data_len, xid_ptr, xid_len, _wrfc.owi(), transient, + external); + if (!cont) + { + dtokp->set_rid(rid); + dtokp->set_dequeue_rid(0); + if (xid_len) + dtokp->set_xid(xid_ptr, xid_len); + else + dtokp->clear_xid(); + _enq_busy = true; + } + bool done = false; + while (!done) + { + assert(_pg_offset_dblks < _cache_pgsize_sblks * JRNL_SBLK_SIZE); + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * JRNL_DBLK_SIZE); + u_int32_t data_offs_dblks = dtokp->dblocks_written(); + u_int32_t ret = _enq_rec.encode(wptr, data_offs_dblks, + (_cache_pgsize_sblks * JRNL_SBLK_SIZE) - _pg_offset_dblks); + + // Remember fid which contains the record header in case record is split over several files + if (data_offs_dblks == 0) + dtokp->set_fid(_wrfc.index()); + _pg_offset_dblks += ret; + _cached_offset_dblks += ret; + dtokp->incr_dblocks_written(ret); + dtokp->incr_pg_cnt(); + _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp); + + // Is the encoding of this record complete? + if (dtokp->dblocks_written() >= _enq_rec.rec_size_dblks()) + { + // TODO: Incorrect - must set state to ENQ_CACHED; ENQ_SUBM is set when AIO returns. + dtokp->set_wstate(data_tok::ENQ_SUBM); + dtokp->set_dsize(tot_data_len); + // Only add this data token to page token list when submit is complete, this way + // long multi-page messages have their token on the page containing the END of the + // message. AIO callbacks will then only process this token when entire message is + // enqueued. + _wrfc.incr_enqcnt(dtokp->fid()); + + if (xid_len) // If part of transaction, add to transaction map + { + std::string xid((const char*)xid_ptr, xid_len); + _tmap.insert_txn_data(xid, txn_data(rid, 0, dtokp->fid(), true)); + } + else + { + if (_emap.insert_pfid(rid, dtokp->fid()) < enq_map::EMAP_OK) // fail + { + // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID. + std::ostringstream oss; + oss << std::hex << "rid=0x" << rid << " _pfid=0x" << dtokp->fid(); + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "enqueue"); + } + } + + done = true; + } + else + dtokp->set_wstate(data_tok::ENQ_PART); + + file_header_check(rid, cont, _enq_rec.rec_size_dblks() - data_offs_dblks); + flush_check(res, cont, done); + } + if (dtokp->wstate() >= data_tok::ENQ_SUBM) + _enq_busy = false; + return res; +} + +iores +wmgr::dequeue(data_tok* dtokp, const void* const xid_ptr, const std::size_t xid_len, const bool txn_coml_commit) +{ + if (xid_len) + assert(xid_ptr != 0); + + if (_enq_busy || _abort_busy || _commit_busy) + return RHM_IORES_BUSY; + + iores res = pre_write_check(WMGR_DEQUEUE, dtokp); + if (res != RHM_IORES_SUCCESS) + return res; + + bool cont = false; + if (_deq_busy) // If dequeue() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT + { + if (dtokp->wstate() == data_tok::DEQ_PART) + cont = true; + else + { + std::ostringstream oss; + oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "dequeue"); + } + } + + const bool ext_rid = dtokp->external_rid(); + u_int64_t rid = (ext_rid | cont) ? dtokp->rid() : _wrfc.get_incr_rid(); + u_int64_t dequeue_rid = (ext_rid | cont) ? dtokp->dequeue_rid() : dtokp->rid(); + _deq_rec.reset(rid, dequeue_rid, xid_ptr, xid_len, _wrfc.owi(), txn_coml_commit); + if (!cont) + { + if (!ext_rid) + { + dtokp->set_rid(rid); + dtokp->set_dequeue_rid(dequeue_rid); + } + if (xid_len) + dtokp->set_xid(xid_ptr, xid_len); + else + dtokp->clear_xid(); + dequeue_check(dtokp->xid(), dequeue_rid); + dtokp->set_dblocks_written(0); // Reset dblks_written from previous op + _deq_busy = true; + } + bool done = false; + while (!done) + { + assert(_pg_offset_dblks < _cache_pgsize_sblks * JRNL_SBLK_SIZE); + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * JRNL_DBLK_SIZE); + u_int32_t data_offs_dblks = dtokp->dblocks_written(); + u_int32_t ret = _deq_rec.encode(wptr, data_offs_dblks, + (_cache_pgsize_sblks * JRNL_SBLK_SIZE) - _pg_offset_dblks); + + // Remember fid which contains the record header in case record is split over several files + if (data_offs_dblks == 0) + dtokp->set_fid(_wrfc.index()); + _pg_offset_dblks += ret; + _cached_offset_dblks += ret; + dtokp->incr_dblocks_written(ret); + dtokp->incr_pg_cnt(); + _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp); + + // Is the encoding of this record complete? + if (dtokp->dblocks_written() >= _deq_rec.rec_size_dblks()) + { + // TODO: Incorrect - must set state to ENQ_CACHED; ENQ_SUBM is set when AIO returns. + dtokp->set_wstate(data_tok::DEQ_SUBM); + + if (xid_len) // If part of transaction, add to transaction map + { + // If the enqueue is part of a pending txn, it will not yet be in emap + _emap.lock(dequeue_rid); // ignore rid not found error + std::string xid((const char*)xid_ptr, xid_len); + _tmap.insert_txn_data(xid, txn_data(rid, dequeue_rid, dtokp->fid(), false)); + } + else + { + int16_t fid = _emap.get_remove_pfid(dtokp->dequeue_rid()); + if (fid < enq_map::EMAP_OK) // fail + { + if (fid == enq_map::EMAP_RID_NOT_FOUND) + { + std::ostringstream oss; + oss << std::hex << "rid=0x" << rid; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "dequeue"); + } + if (fid == enq_map::EMAP_LOCKED) + { + std::ostringstream oss; + oss << std::hex << "rid=0x" << rid; + throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "dequeue"); + } + } + _wrfc.decr_enqcnt(fid); + } + + done = true; + } + else + dtokp->set_wstate(data_tok::DEQ_PART); + + file_header_check(rid, cont, _deq_rec.rec_size_dblks() - data_offs_dblks); + flush_check(res, cont, done); + } + if (dtokp->wstate() >= data_tok::DEQ_SUBM) + _deq_busy = false; + return res; +} + +iores +wmgr::abort(data_tok* dtokp, const void* const xid_ptr, const std::size_t xid_len) +{ + // commit and abort MUST have a valid xid + assert(xid_ptr != 0 && xid_len > 0); + + if (_enq_busy || _deq_busy || _commit_busy) + return RHM_IORES_BUSY; + + iores res = pre_write_check(WMGR_ABORT, dtokp); + if (res != RHM_IORES_SUCCESS) + return res; + + bool cont = false; + if (_abort_busy) // If abort() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT + { + if (dtokp->wstate() == data_tok::ABORT_PART) + cont = true; + else + { + std::ostringstream oss; + oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "abort"); + } + } + + u_int64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _wrfc.get_incr_rid(); + _txn_rec.reset(RHM_JDAT_TXA_MAGIC, rid, xid_ptr, xid_len, _wrfc.owi()); + if (!cont) + { + dtokp->set_rid(rid); + dtokp->set_dequeue_rid(0); + dtokp->set_xid(xid_ptr, xid_len); + dtokp->set_dblocks_written(0); // Reset dblks_written from previous op + _abort_busy = true; + } + bool done = false; + while (!done) + { + assert(_pg_offset_dblks < _cache_pgsize_sblks * JRNL_SBLK_SIZE); + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * JRNL_DBLK_SIZE); + u_int32_t data_offs_dblks = dtokp->dblocks_written(); + u_int32_t ret = _txn_rec.encode(wptr, data_offs_dblks, + (_cache_pgsize_sblks * JRNL_SBLK_SIZE) - _pg_offset_dblks); + + // Remember fid which contains the record header in case record is split over several files + if (data_offs_dblks == 0) + dtokp->set_fid(_wrfc.index()); + _pg_offset_dblks += ret; + _cached_offset_dblks += ret; + dtokp->incr_dblocks_written(ret); + dtokp->incr_pg_cnt(); + _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp); + + // Is the encoding of this record complete? + if (dtokp->dblocks_written() >= _txn_rec.rec_size_dblks()) + { + dtokp->set_wstate(data_tok::ABORT_SUBM); + + // Delete this txn from tmap, unlock any locked records in emap + std::string xid((const char*)xid_ptr, xid_len); + txn_data_list tdl = _tmap.get_remove_tdata_list(xid); // tdl will be empty if xid not found + for (tdl_itr itr = tdl.begin(); itr != tdl.end(); itr++) + { + if (!itr->_enq_flag) + _emap.unlock(itr->_drid); // ignore rid not found error + if (itr->_enq_flag) + _wrfc.decr_enqcnt(itr->_pfid); + } + std::pair<std::set<std::string>::iterator, bool> res = _txn_pending_set.insert(xid); + if (!res.second) + { + std::ostringstream oss; + oss << std::hex << "_txn_pending_set: xid=\"" << xid << "\""; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "abort"); + } + + done = true; + } + else + dtokp->set_wstate(data_tok::ABORT_PART); + + file_header_check(rid, cont, _txn_rec.rec_size_dblks() - data_offs_dblks); + flush_check(res, cont, done); + } + if (dtokp->wstate() >= data_tok::ABORT_SUBM) + _abort_busy = false; + return res; +} + +iores +wmgr::commit(data_tok* dtokp, const void* const xid_ptr, const std::size_t xid_len) +{ + // commit and abort MUST have a valid xid + assert(xid_ptr != 0 && xid_len > 0); + + if (_enq_busy || _deq_busy || _abort_busy) + return RHM_IORES_BUSY; + + iores res = pre_write_check(WMGR_COMMIT, dtokp); + if (res != RHM_IORES_SUCCESS) + return res; + + bool cont = false; + if (_commit_busy) // If commit() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT + { + if (dtokp->wstate() == data_tok::COMMIT_PART) + cont = true; + else + { + std::ostringstream oss; + oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "commit"); + } + } + + u_int64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _wrfc.get_incr_rid(); + _txn_rec.reset(RHM_JDAT_TXC_MAGIC, rid, xid_ptr, xid_len, _wrfc.owi()); + if (!cont) + { + dtokp->set_rid(rid); + dtokp->set_dequeue_rid(0); + dtokp->set_xid(xid_ptr, xid_len); + dtokp->set_dblocks_written(0); // Reset dblks_written from previous op + _commit_busy = true; + } + bool done = false; + while (!done) + { + assert(_pg_offset_dblks < _cache_pgsize_sblks * JRNL_SBLK_SIZE); + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * JRNL_DBLK_SIZE); + u_int32_t data_offs_dblks = dtokp->dblocks_written(); + u_int32_t ret = _txn_rec.encode(wptr, data_offs_dblks, + (_cache_pgsize_sblks * JRNL_SBLK_SIZE) - _pg_offset_dblks); + + // Remember fid which contains the record header in case record is split over several files + if (data_offs_dblks == 0) + dtokp->set_fid(_wrfc.index()); + _pg_offset_dblks += ret; + _cached_offset_dblks += ret; + dtokp->incr_dblocks_written(ret); + dtokp->incr_pg_cnt(); + _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp); + + // Is the encoding of this record complete? + if (dtokp->dblocks_written() >= _txn_rec.rec_size_dblks()) + { + dtokp->set_wstate(data_tok::COMMIT_SUBM); + + // Delete this txn from tmap, process records into emap + std::string xid((const char*)xid_ptr, xid_len); + txn_data_list tdl = _tmap.get_remove_tdata_list(xid); // tdl will be empty if xid not found + for (tdl_itr itr = tdl.begin(); itr != tdl.end(); itr++) + { + if (itr->_enq_flag) // txn enqueue + { + if (_emap.insert_pfid(itr->_rid, itr->_pfid) < enq_map::EMAP_OK) // fail + { + // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID. + std::ostringstream oss; + oss << std::hex << "rid=0x" << itr->_rid << " _pfid=0x" << itr->_pfid; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "commit"); + } + } + else // txn dequeue + { + int16_t fid = _emap.get_remove_pfid(itr->_drid, true); + if (fid < enq_map::EMAP_OK) // fail + { + if (fid == enq_map::EMAP_RID_NOT_FOUND) + { + std::ostringstream oss; + oss << std::hex << "rid=0x" << rid; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "dequeue"); + } + if (fid == enq_map::EMAP_LOCKED) + { + std::ostringstream oss; + oss << std::hex << "rid=0x" << rid; + throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "dequeue"); + } + } + _wrfc.decr_enqcnt(fid); + } + } + std::pair<std::set<std::string>::iterator, bool> res = _txn_pending_set.insert(xid); + if (!res.second) + { + std::ostringstream oss; + oss << std::hex << "_txn_pending_set: xid=\"" << xid << "\""; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "commit"); + } + + done = true; + } + else + dtokp->set_wstate(data_tok::COMMIT_PART); + + file_header_check(rid, cont, _txn_rec.rec_size_dblks() - data_offs_dblks); + flush_check(res, cont, done); + } + if (dtokp->wstate() >= data_tok::COMMIT_SUBM) + _commit_busy = false; + return res; +} + +void +wmgr::file_header_check(const u_int64_t rid, const bool cont, const u_int32_t rec_dblks_rem) +{ + // Has the file header been written (i.e. write pointers still at 0)? + if (_wrfc.is_void()) + { + bool file_fit = rec_dblks_rem <= _jfsize_dblks; + bool file_full = rec_dblks_rem == _jfsize_dblks; + std::size_t fro = 0; + if (cont) + { + if (file_fit && !file_full) + fro = (rec_dblks_rem + JRNL_SBLK_SIZE) * JRNL_DBLK_SIZE; + } + else + fro = JRNL_SBLK_SIZE * JRNL_DBLK_SIZE; + write_fhdr(rid, _wrfc.index(), _wrfc.index(), fro); + } +} + +void +wmgr::flush_check(iores& res, bool& cont, bool& done) +{ + // Is page is full, flush + if (_pg_offset_dblks >= _cache_pgsize_sblks * JRNL_SBLK_SIZE) + { + res = write_flush(); + assert(res == RHM_IORES_SUCCESS); + + if (_page_cb_arr[_pg_index]._state == AIO_PENDING && !done) + { + res = RHM_IORES_PAGE_AIOWAIT; + done = true; + } + + // If file is full, rotate to next file + if (_pg_cntr >= _jfsize_pgs) + { + iores rfres = rotate_file(); + if (rfres != RHM_IORES_SUCCESS) + res = rfres; + if (!done) + { + if (rfres == RHM_IORES_SUCCESS) + cont = true; + else + done = true; + } + } + } +} + +iores +wmgr::flush() +{ + iores res = write_flush(); + if (_pg_cntr >= _jfsize_pgs) + { + iores rfres = rotate_file(); + if (rfres != RHM_IORES_SUCCESS) + res = rfres; + } + return res; +} + +iores +wmgr::write_flush() +{ + iores res = RHM_IORES_SUCCESS; + // Don't bother flushing an empty page or one that is still in state AIO_PENDING + if (_cached_offset_dblks) + { + if (_page_cb_arr[_pg_index]._state == AIO_PENDING) + res = RHM_IORES_PAGE_AIOWAIT; + else + { + if (_page_cb_arr[_pg_index]._state != IN_USE) + { + std::ostringstream oss; + oss << "pg_index=" << _pg_index << " state=" << _page_cb_arr[_pg_index].state_str(); + throw jexception(jerrno::JERR_WMGR_BADPGSTATE, oss.str(), "wmgr", + "write_flush"); + } + + // Send current page using AIO + + // In manual flushes, dblks may not coincide with sblks, add filler records ("RHMx") + // if necessary. + dblk_roundup(); + + std::size_t pg_offs = (_pg_offset_dblks - _cached_offset_dblks) * JRNL_DBLK_SIZE; + aio_cb* aiocbp = &_aio_cb_arr[_pg_index]; + aio::prep_pwrite_2(aiocbp, _wrfc.fh(), + (char*)_page_ptr_arr[_pg_index] + pg_offs, _cached_offset_dblks * JRNL_DBLK_SIZE, + _wrfc.subm_offs()); + page_cb* pcbp = (page_cb*)(aiocbp->data); // This page control block (pcb) + pcbp->_wdblks = _cached_offset_dblks; + pcbp->_wfh = _wrfc.file_controller(); + if (aio::submit(_ioctx, 1, &aiocbp) < 0) + throw jexception(jerrno::JERR__AIO, "wmgr", "write_flush"); + _wrfc.add_subm_cnt_dblks(_cached_offset_dblks); + _wrfc.incr_aio_cnt(); + _aio_evt_rem++; + _cached_offset_dblks = 0; + _jc->instr_incr_outstanding_aio_cnt(); + + rotate_page(); // increments _pg_index, resets _pg_offset_dblks if req'd + if (_page_cb_arr[_pg_index]._state == UNUSED) + _page_cb_arr[_pg_index]._state = IN_USE; + } + } + get_events(UNUSED, 0); + if (_page_cb_arr[_pg_index]._state == UNUSED) + _page_cb_arr[_pg_index]._state = IN_USE; + return res; +} + +iores +wmgr::rotate_file() +{ + _pg_cntr = 0; + iores res = _wrfc.rotate(); + _jc->chk_wr_frot(); + return res; +} + +int32_t +wmgr::get_events(page_state state, timespec* const timeout, bool flush) +{ + if (_aio_evt_rem == 0) // no events to get + return 0; + + int ret = 0; + if ((ret = aio::getevents(_ioctx, flush ? _aio_evt_rem : 1, _aio_evt_rem/*_cache_num_pages + _jc->num_jfiles()*/, _aio_event_arr, timeout)) < 0) + { + if (ret == -EINTR) // Interrupted by signal + return 0; + std::ostringstream oss; + oss << "io_getevents() failed: " << std::strerror(-ret) << " (" << ret << ")"; + throw jexception(jerrno::JERR__AIO, oss.str(), "wmgr", "get_events"); + } + + if (ret == 0 && timeout) + return jerrno::AIO_TIMEOUT; + + int32_t tot_data_toks = 0; + for (int i=0; i<ret; i++) // Index of returned AIOs + { + if (_aio_evt_rem == 0) + { + std::ostringstream oss; + oss << "_aio_evt_rem; evt " << (i + 1) << " of " << ret; + throw jexception(jerrno::JERR__UNDERFLOW, oss.str(), "wmgr", "get_events"); + } + _aio_evt_rem--; + aio_cb* aiocbp = _aio_event_arr[i].obj; // This I/O control block (iocb) + page_cb* pcbp = (page_cb*)(aiocbp->data); // This page control block (pcb) + long aioret = (long)_aio_event_arr[i].res; + if (aioret < 0) + { + std::ostringstream oss; + oss << "AIO write operation failed: " << std::strerror(-aioret) << " (" << aioret << ") ["; + if (pcbp) + oss << "pg=" << pcbp->_index; + else + { + file_hdr* fhp = (file_hdr*)aiocbp->u.c.buf; + oss << "fid=" << fhp->_pfid; + } + oss << " size=" << aiocbp->u.c.nbytes; + oss << " offset=" << aiocbp->u.c.offset << " fh=" << aiocbp->aio_fildes << "]"; + throw jexception(jerrno::JERR__AIO, oss.str(), "wmgr", "get_events"); + } + if (pcbp) // Page writes have pcb + { + u_int32_t s = pcbp->_pdtokl->size(); + std::vector<data_tok*> dtokl; + dtokl.reserve(s); + for (u_int32_t k=0; k<s; k++) + { + data_tok* dtokp = pcbp->_pdtokl->at(k); + if (dtokp->decr_pg_cnt() == 0) + { + std::set<std::string>::iterator it; + switch (dtokp->wstate()) + { + case data_tok::ENQ_SUBM: + dtokl.push_back(dtokp); + tot_data_toks++; + dtokp->set_wstate(data_tok::ENQ); + if (dtokp->has_xid()) + // Ignoring return value here. A non-zero return can signify that the transaction + // has committed or aborted, and which was completed prior to the aio returning. + _tmap.set_aio_compl(dtokp->xid(), dtokp->rid()); + break; + case data_tok::DEQ_SUBM: + dtokl.push_back(dtokp); + tot_data_toks++; + dtokp->set_wstate(data_tok::DEQ); + if (dtokp->has_xid()) + // Ignoring return value - see note above. + _tmap.set_aio_compl(dtokp->xid(), dtokp->rid()); + break; + case data_tok::ABORT_SUBM: + dtokl.push_back(dtokp); + tot_data_toks++; + dtokp->set_wstate(data_tok::ABORTED); + it = _txn_pending_set.find(dtokp->xid()); + if (it == _txn_pending_set.end()) + { + std::ostringstream oss; + oss << std::hex << "_txn_pending_set: abort xid=\""; + oss << dtokp->xid() << "\""; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", + "get_events"); + } + _txn_pending_set.erase(it); + break; + case data_tok::COMMIT_SUBM: + dtokl.push_back(dtokp); + tot_data_toks++; + dtokp->set_wstate(data_tok::COMMITTED); + it = _txn_pending_set.find(dtokp->xid()); + if (it == _txn_pending_set.end()) + { + std::ostringstream oss; + oss << std::hex << "_txn_pending_set: commit xid=\""; + oss << dtokp->xid() << "\""; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", + "get_events"); + } + _txn_pending_set.erase(it); + break; + case data_tok::ENQ_PART: + case data_tok::DEQ_PART: + case data_tok::ABORT_PART: + case data_tok::COMMIT_PART: + // ignore these + break; + default: + // throw for anything else + std::ostringstream oss; + oss << "dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr", + "get_events"); + } // switch + } // if + } // for + + // Increment the completed write offset + // NOTE: We cannot use _wrfc here, as it may have rotated since submitting count. + // Use stored pointer to fcntl in the pcb instead. + pcbp->_wfh->add_wr_cmpl_cnt_dblks(pcbp->_wdblks); + pcbp->_wfh->decr_aio_cnt(); + _jc->instr_decr_outstanding_aio_cnt(); + + // Clean up this pcb's data_tok list + pcbp->_pdtokl->clear(); + pcbp->_state = state; + + // Perform AIO return callback + if (_cbp && tot_data_toks) + _cbp->wr_aio_cb(dtokl); + } + else // File header writes have no pcb + { + // get lfid from original file header record, update info for that lfid + file_hdr* fhp = (file_hdr*)aiocbp->u.c.buf; + u_int32_t lfid = fhp->_lfid; + fcntl* fcntlp = _jc->get_fcntlp(lfid); + fcntlp->add_wr_cmpl_cnt_dblks(JRNL_SBLK_SIZE); + fcntlp->decr_aio_cnt(); + fcntlp->set_wr_fhdr_aio_outstanding(false); + } + } + + return tot_data_toks; +} + +bool +wmgr::is_txn_synced(const std::string& xid) +{ + // Ignore xid not found error here + if (_tmap.is_txn_synced(xid) == txn_map::TMAP_NOT_SYNCED) + return false; + // Check for outstanding commit/aborts + std::set<std::string>::iterator it = _txn_pending_set.find(xid); + return it == _txn_pending_set.end(); +} + +void +wmgr::initialize(aio_callback* const cbp, const u_int32_t wcache_pgsize_sblks, const u_int16_t wcache_num_pages) +{ + pmgr::initialize(cbp, wcache_pgsize_sblks, wcache_num_pages); + wmgr::clean(); + _num_jfiles = _jc->num_jfiles(); + if (::posix_memalign(&_fhdr_base_ptr, _sblksize, _sblksize * _num_jfiles)) + { + wmgr::clean(); + std::ostringstream oss; + oss << "posix_memalign(): blksize=" << _sblksize << " size=" << _sblksize; + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__MALLOC, oss.str(), "wmgr", "initialize"); + } + _fhdr_ptr_arr = (void**)std::malloc(_num_jfiles * sizeof(void*)); + MALLOC_CHK(_fhdr_ptr_arr, "_fhdr_ptr_arr", "wmgr", "initialize"); + _fhdr_aio_cb_arr = (aio_cb**)std::malloc(sizeof(aio_cb*) * _num_jfiles); + MALLOC_CHK(_fhdr_aio_cb_arr, "_fhdr_aio_cb_arr", "wmgr", "initialize"); + std::memset(_fhdr_aio_cb_arr, 0, sizeof(aio_cb*) * _num_jfiles); + for (u_int16_t i=0; i<_num_jfiles; i++) + { + _fhdr_ptr_arr[i] = (void*)((char*)_fhdr_base_ptr + _sblksize * i); + _fhdr_aio_cb_arr[i] = new aio_cb; + } + _page_cb_arr[0]._state = IN_USE; + _ddtokl.clear(); + _cached_offset_dblks = 0; + _enq_busy = false; +} + +iores +wmgr::pre_write_check(const _op_type op, const data_tok* const dtokp, + const std::size_t xidsize, const std::size_t dsize, const bool external + ) const +{ + // Check status of current file + if (!_wrfc.is_wr_reset()) + { + if (!_wrfc.wr_reset()) + return RHM_IORES_FULL; + } + + // Check status of current page is ok for writing + if (_page_cb_arr[_pg_index]._state != IN_USE) + { + if (_page_cb_arr[_pg_index]._state == UNUSED) + _page_cb_arr[_pg_index]._state = IN_USE; + else if (_page_cb_arr[_pg_index]._state == AIO_PENDING) + return RHM_IORES_PAGE_AIOWAIT; + else + { + std::ostringstream oss; + oss << "jrnl=" << _jc->id() << " op=" << _op_str[op]; + oss << " index=" << _pg_index << " pg_state=" << _page_cb_arr[_pg_index].state_str(); + throw jexception(jerrno::JERR_WMGR_BADPGSTATE, oss.str(), "wmgr", "pre_write_check"); + } + } + + // operation-specific checks + switch (op) + { + case WMGR_ENQUEUE: + { + // Check for enqueue reaching cutoff threshold + u_int32_t size_dblks = jrec::size_dblks(enq_rec::rec_size(xidsize, dsize, + external)); + if (!_enq_busy && _wrfc.enq_threshold(_cached_offset_dblks + size_dblks)) + return RHM_IORES_ENQCAPTHRESH; + if (!dtokp->is_writable()) + { + std::ostringstream oss; + oss << "jrnl=" << _jc->id() << " op=" << _op_str[op]; + oss << " dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr", + "pre_write_check"); + } + } + break; + case WMGR_DEQUEUE: + if (!dtokp->is_dequeueable()) + { + std::ostringstream oss; + oss << "jrnl=" << _jc->id() << " op=" << _op_str[op]; + oss << " dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr", + "pre_write_check"); + } + break; + case WMGR_ABORT: + break; + case WMGR_COMMIT: + break; + } + + return RHM_IORES_SUCCESS; +} + +void +wmgr::dequeue_check(const std::string& xid, const u_int64_t drid) +{ + // First check emap + bool found = false; + int16_t fid = _emap.get_pfid(drid); + if (fid < enq_map::EMAP_OK) // fail + { + if (fid == enq_map::EMAP_RID_NOT_FOUND) + { + if (xid.size()) + found = _tmap.data_exists(xid, drid); + } + else if (fid == enq_map::EMAP_LOCKED) + { + std::ostringstream oss; + oss << std::hex << "drid=0x" << drid; + throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "dequeue_check"); + } + } + else + found = true; + if (!found) + { + std::ostringstream oss; + oss << "jrnl=" << _jc->id() << " drid=0x" << std::hex << drid; + throw jexception(jerrno::JERR_WMGR_DEQRIDNOTENQ, oss.str(), "wmgr", "dequeue_check"); + } +} + +void +wmgr::dblk_roundup() +{ + const u_int32_t xmagic = RHM_JDAT_EMPTY_MAGIC; + u_int32_t wdblks = jrec::size_blks(_cached_offset_dblks, JRNL_SBLK_SIZE) * JRNL_SBLK_SIZE; + while (_cached_offset_dblks < wdblks) + { + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * JRNL_DBLK_SIZE); + std::memcpy(wptr, (const void*)&xmagic, sizeof(xmagic)); +#ifdef RHM_CLEAN + std::memset((char*)wptr + sizeof(xmagic), RHM_CLEAN_CHAR, JRNL_DBLK_SIZE - sizeof(xmagic)); +#endif + _pg_offset_dblks++; + _cached_offset_dblks++; + } +} + +void +wmgr::write_fhdr(u_int64_t rid, u_int16_t fid, u_int16_t lid, std::size_t fro) +{ + file_hdr fhdr(RHM_JDAT_FILE_MAGIC, RHM_JDAT_VERSION, rid, fid, lid, fro, _wrfc.owi(), true); + std::memcpy(_fhdr_ptr_arr[fid], &fhdr, sizeof(fhdr)); +#ifdef RHM_CLEAN + std::memset((char*)_fhdr_ptr_arr[fid] + sizeof(fhdr), RHM_CLEAN_CHAR, _sblksize - sizeof(fhdr)); +#endif + aio_cb* aiocbp = _fhdr_aio_cb_arr[fid]; + aio::prep_pwrite(aiocbp, _wrfc.fh(), _fhdr_ptr_arr[fid], _sblksize, 0); + if (aio::submit(_ioctx, 1, &aiocbp) < 0) + throw jexception(jerrno::JERR__AIO, "wmgr", "write_fhdr"); + _aio_evt_rem++; + _wrfc.add_subm_cnt_dblks(JRNL_SBLK_SIZE); + _wrfc.incr_aio_cnt(); + _wrfc.file_controller()->set_wr_fhdr_aio_outstanding(true); +} + +void +wmgr::rotate_page() +{ + _page_cb_arr[_pg_index]._state = AIO_PENDING; + if (_pg_offset_dblks >= _cache_pgsize_sblks * JRNL_SBLK_SIZE) + { + _pg_offset_dblks = 0; + _pg_cntr++; + } + if (++_pg_index >= _cache_num_pages) + _pg_index = 0; +} + +void +wmgr::clean() +{ + std::free(_fhdr_base_ptr); + _fhdr_base_ptr = 0; + + std::free(_fhdr_ptr_arr); + _fhdr_ptr_arr = 0; + + if (_fhdr_aio_cb_arr) + { + for (u_int32_t i=0; i<_num_jfiles; i++) + delete _fhdr_aio_cb_arr[i]; + std::free(_fhdr_aio_cb_arr); + _fhdr_aio_cb_arr = 0; + } +} + +const std::string +wmgr::status_str() const +{ + std::ostringstream oss; + oss << "wmgr: pi=" << _pg_index << " pc=" << _pg_cntr; + oss << " po=" << _pg_offset_dblks << " aer=" << _aio_evt_rem; + oss << " edac:" << (_enq_busy?"T":"F") << (_deq_busy?"T":"F"); + oss << (_abort_busy?"T":"F") << (_commit_busy?"T":"F"); + oss << " ps=["; + for (int i=0; i<_cache_num_pages; i++) + { + switch (_page_cb_arr[i]._state) + { + case UNUSED: oss << "-"; break; + case IN_USE: oss << "U"; break; + case AIO_PENDING: oss << "A"; break; + case AIO_COMPLETE: oss << "*"; break; + default: oss << _page_cb_arr[i]._state; + } + } + oss << "] " << _wrfc.status_str(); + return oss.str(); +} + +// static + +const char* wmgr::_op_str[] = {"enqueue", "dequeue", "abort", "commit"}; + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/wmgr.h b/cpp/src/qpid/legacystore/jrnl/wmgr.h new file mode 100644 index 0000000000..8347221b1d --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/wmgr.h @@ -0,0 +1,147 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file wmgr.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::wmgr (write manager). See + * class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_WMGR_H +#define QPID_LEGACYSTORE_JRNL_WMGR_H + +namespace mrg +{ +namespace journal +{ +class wmgr; +} +} + +#include <cstring> +#include "qpid/legacystore/jrnl/enums.h" +#include "qpid/legacystore/jrnl/pmgr.h" +#include "qpid/legacystore/jrnl/wrfc.h" +#include <set> + +namespace mrg +{ +namespace journal +{ + + /** + * \brief Class for managing a write page cache of arbitrary size and number of pages. + * + * The write page cache works on the principle of caching the write data within a page until + * that page is either full or flushed; this initiates a single AIO write operation to store + * the data on disk. + * + * The maximum disk throughput is achieved by keeping the write operations of uniform size. + * Waiting for a page cache to fill achieves this; and in high data volume/throughput situations + * achieves the optimal disk throughput. Calling flush() forces a write of the current page cache + * no matter how full it is, and disrupts the uniformity of the write operations. This should + * normally only be done if throughput drops and there is a danger of a page of unwritten data + * waiting around for excessive time. + * + * The usual tradeoff between data storage latency and throughput performance applies. + */ + class wmgr : public pmgr + { + private: + wrfc& _wrfc; ///< Ref to write rotating file controller + u_int32_t _max_dtokpp; ///< Max data writes per page + u_int32_t _max_io_wait_us; ///< Max wait in microseconds till submit + void* _fhdr_base_ptr; ///< Base pointer to file header memory + void** _fhdr_ptr_arr; ///< Array of pointers to file headers memory + aio_cb** _fhdr_aio_cb_arr; ///< Array of iocb pointers for file header writes + u_int32_t _cached_offset_dblks; ///< Amount of unwritten data in page (dblocks) + std::deque<data_tok*> _ddtokl; ///< Deferred dequeue data_tok list + u_int32_t _jfsize_dblks; ///< Journal file size in dblks (NOT sblks!) + u_int32_t _jfsize_pgs; ///< Journal file size in cache pages + u_int16_t _num_jfiles; ///< Number of files used in iocb mallocs + + // TODO: Convert _enq_busy etc into a proper threadsafe lock + // TODO: Convert to enum? Are these encodes mutually exclusive? + bool _enq_busy; ///< Flag true if enqueue is in progress + bool _deq_busy; ///< Flag true if dequeue is in progress + bool _abort_busy; ///< Flag true if abort is in progress + bool _commit_busy; ///< Flag true if commit is in progress + + enum _op_type { WMGR_ENQUEUE = 0, WMGR_DEQUEUE, WMGR_ABORT, WMGR_COMMIT }; + static const char* _op_str[]; + + enq_rec _enq_rec; ///< Enqueue record used for encoding/decoding + deq_rec _deq_rec; ///< Dequeue record used for encoding/decoding + txn_rec _txn_rec; ///< Transaction record used for encoding/decoding + std::set<std::string> _txn_pending_set; ///< Set containing xids of pending commits/aborts + + public: + wmgr(jcntl* jc, enq_map& emap, txn_map& tmap, wrfc& wrfc); + wmgr(jcntl* jc, enq_map& emap, txn_map& tmap, wrfc& wrfc, const u_int32_t max_dtokpp, + const u_int32_t max_iowait_us); + virtual ~wmgr(); + + void initialize(aio_callback* const cbp, const u_int32_t wcache_pgsize_sblks, + const u_int16_t wcache_num_pages, const u_int32_t max_dtokpp, + const u_int32_t max_iowait_us, std::size_t eo = 0); + iores enqueue(const void* const data_buff, const std::size_t tot_data_len, + const std::size_t this_data_len, data_tok* dtokp, const void* const xid_ptr, + const std::size_t xid_len, const bool transient, const bool external); + iores dequeue(data_tok* dtokp, const void* const xid_ptr, const std::size_t xid_len, + const bool txn_coml_commit); + iores abort(data_tok* dtokp, const void* const xid_ptr, const std::size_t xid_len); + iores commit(data_tok* dtokp, const void* const xid_ptr, const std::size_t xid_len); + iores flush(); + int32_t get_events(page_state state, timespec* const timeout, bool flush = false); + bool is_txn_synced(const std::string& xid); + inline bool curr_pg_blocked() const { return _page_cb_arr[_pg_index]._state != UNUSED; } + inline bool curr_file_blocked() const { return _wrfc.aio_cnt() > 0; } + inline u_int32_t unflushed_dblks() { return _cached_offset_dblks; } + + // Debug aid + const std::string status_str() const; + + private: + void initialize(aio_callback* const cbp, const u_int32_t wcache_pgsize_sblks, + const u_int16_t wcache_num_pages); + iores pre_write_check(const _op_type op, const data_tok* const dtokp, + const std::size_t xidsize = 0, const std::size_t dsize = 0, const bool external = false) + const; + void dequeue_check(const std::string& xid, const u_int64_t drid); + void file_header_check(const u_int64_t rid, const bool cont, const u_int32_t rec_dblks_rem); + void flush_check(iores& res, bool& cont, bool& done); + iores write_flush(); + iores rotate_file(); + void dblk_roundup(); + void write_fhdr(u_int64_t rid, u_int16_t fid, u_int16_t lid, std::size_t fro); + void rotate_page(); + void clean(); + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_WMGR_H diff --git a/cpp/src/qpid/legacystore/jrnl/wrfc.cpp b/cpp/src/qpid/legacystore/jrnl/wrfc.cpp new file mode 100644 index 0000000000..43461b66a3 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/wrfc.cpp @@ -0,0 +1,162 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file wrfc.cpp + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::wrfc (rotating + * file controller). See comments in file wrfc.h for details. + * + * \author Kim van der Riet + */ + +#include "qpid/legacystore/jrnl/wrfc.h" + +#include <cmath> +#include "qpid/legacystore/jrnl/jerrno.h" +#include "qpid/legacystore/jrnl/jexception.h" + +namespace mrg +{ +namespace journal +{ + +wrfc::wrfc(const lpmgr* lpmp): + rfc(lpmp), + _fsize_sblks(0), + _fsize_dblks(0), + _enq_cap_offs_dblks(0), + _rid(0), + _reset_ok(false), + _owi(false), + _frot(true) +{} + +wrfc::~wrfc() +{} + +void +wrfc::initialize(const u_int32_t fsize_sblks, rcvdat* rdp) +{ + if (rdp) + { + _fc_index = rdp->_lfid; + _curr_fc = _lpmp->get_fcntlp(_fc_index); + _curr_fc->wr_reset(rdp); + _rid = rdp->_h_rid + 1; + _reset_ok = true; + _owi = rdp->_owi; + _frot = rdp->_frot; + if (rdp->_lffull) + rotate(); + } + else + { + rfc::initialize(); + rfc::set_findex(0); + _rid = 0ULL; + _reset_ok = false; + } + _fsize_sblks = fsize_sblks; + _fsize_dblks = fsize_sblks * JRNL_SBLK_SIZE; + _enq_cap_offs_dblks = (u_int32_t)std::ceil(_fsize_dblks * _lpmp->num_jfiles() * (100.0 - JRNL_ENQ_THRESHOLD) / 100); + // Check the offset is at least one file; if not, make it so + if (_enq_cap_offs_dblks < _fsize_dblks) + _enq_cap_offs_dblks = _fsize_dblks; +} + +iores wrfc::rotate() +{ + if (!_lpmp->num_jfiles()) + throw jexception(jerrno::JERR__NINIT, "wrfc", "rotate"); + _fc_index++; + if (_fc_index == _lpmp->num_jfiles()) + { + _fc_index = 0; + _owi = !_owi; + _frot = false; + } + _curr_fc = _lpmp->get_fcntlp(_fc_index); + if (_curr_fc->aio_cnt()) + return RHM_IORES_FILE_AIOWAIT; + if (!wr_reset()) //Checks if file is still in use (ie not fully dequeued yet) + return RHM_IORES_FULL; + return RHM_IORES_SUCCESS; +} + +u_int16_t wrfc::earliest_index() const +{ + if (_frot) + return 0; + u_int16_t next_index = _fc_index + 1; + if (next_index >= _lpmp->num_jfiles()) + next_index = 0; + return next_index; +} + +bool +wrfc::enq_threshold(const u_int32_t enq_dsize_dblks) const +{ + u_int32_t subm_dblks = subm_cnt_dblks(); // includes file hdr if > 0 + // This compensates for new files which don't have their file headers written yet, + // as file header space cannot be included in this calculation. + if (subm_dblks != 0) + subm_dblks -= 4; + u_int32_t fwd_dblks = subm_dblks + enq_dsize_dblks + _enq_cap_offs_dblks; + u_int16_t findex = _fc_index; + fcntl* fcp = _curr_fc; + bool in_use = false; + while (fwd_dblks && !(findex != _fc_index && fcp->enqcnt())) + { + fwd_dblks -= fwd_dblks > _fsize_dblks ? _fsize_dblks : fwd_dblks; + if (fwd_dblks) + { + if (++findex == _lpmp->num_jfiles()) + findex = 0; + fcp = _lpmp->get_fcntlp(findex); + } + in_use |= fcp->enqcnt() > 0; + } + // Return true if threshold exceeded + return findex != _fc_index && in_use; +} + +bool wrfc::wr_reset() +{ + _reset_ok = _curr_fc->reset(); // returns false if full (ie file still contains enqueued recs) + return _reset_ok; +} + +// TODO: update this to reflect all status data +std::string +wrfc::status_str() const +{ + std::ostringstream oss; + oss << "wrfc: " << rfc::status_str(); + if (is_active()) + oss << " fcntl[" << _fc_index << "]: " << _curr_fc->status_str(); + return oss.str(); +} + +} // namespace journal +} // namespace mrg diff --git a/cpp/src/qpid/legacystore/jrnl/wrfc.h b/cpp/src/qpid/legacystore/jrnl/wrfc.h new file mode 100644 index 0000000000..f0e4e73151 --- /dev/null +++ b/cpp/src/qpid/legacystore/jrnl/wrfc.h @@ -0,0 +1,154 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +/** + * \file wrfc.h + * + * Qpid asynchronous store plugin library + * + * File containing code for class mrg::journal::wrfc (write rotating + * file controller). See class documentation for details. + * + * \author Kim van der Riet + */ + +#ifndef QPID_LEGACYSTORE_JRNL_WRFC_H +#define QPID_LEGACYSTORE_JRNL_WRFC_H + +namespace mrg +{ +namespace journal +{ +class wrfc; +} +} + +#include <cstddef> +#include "qpid/legacystore/jrnl/enums.h" +#include "qpid/legacystore/jrnl/rrfc.h" + +namespace mrg +{ +namespace journal +{ + + /** + * \class wrfc + * \brief Class to handle write management of a journal rotating file controller. + */ + class wrfc : public rfc + { + private: + u_int32_t _fsize_sblks; ///< Size of journal files in sblks + u_int32_t _fsize_dblks; ///< Size of journal files in dblks + u_int32_t _enq_cap_offs_dblks; ///< Enqueue capacity offset + u_int64_t _rid; ///< Master counter for record ID (rid) + bool _reset_ok; ///< Flag set when reset succeeds + bool _owi; ///< Overwrite indicator + bool _frot; ///< Flag is true for first rotation, false otherwise + + public: + wrfc(const lpmgr* lpmp); + virtual ~wrfc(); + + /** + * \brief Initialize the controller. + * \param fsize_sblks Size of each journal file in sblks. + * \param rdp Struct carrying restore information. Optional for non-restore use, defaults to 0 (NULL). + */ + using rfc::initialize; + void initialize(const u_int32_t fsize_sblks, rcvdat* rdp = 0); + + /** + * \brief Rotate active file controller to next file in rotating file group. + * \exception jerrno::JERR__NINIT if called before calling initialize(). + */ + iores rotate(); + + /** + * \brief Returns the index of the earliest complete file within the rotating + * file group. Unwritten files are excluded. The currently active file is + * excluded unless it is the only written file. + */ + u_int16_t earliest_index() const; + + /** + * \brief Determines if a proposed write would cause the enqueue threshold to be exceeded. + * + * The following routine finds whether the next write will take the write pointer to beyond the + * enqueue limit threshold. The following illustrates how this is achieved. + * <pre> + * Current file index: 4 +---+----------+ + * X's mark still-enqueued records |msg| 1-thresh | + * msg = current msg size + unwritten cache +---+----------+ + * thresh = JRNL_ENQ_THRESHOLD as a fraction ^ V + * +-------+-------+-------+-------+--+----+-------+-+-----+-------+ + * file num ->| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * enq recs ->| X XX |XX XXX |XX XXXX|XXXXXXX|XX | | | X | + * +-------+-------+-------+-------+--+----+-------+-+-----+-------+ + * ^ ^ ^ + * subm_dblks --+ | | + * These files must be free of enqueues + * If not, return true. + * </pre> + * \param enq_dsize_dblks Proposed size of write in dblocks + */ + bool enq_threshold(const u_int32_t enq_dsize_dblks) const; + + inline u_int64_t rid() const { return _rid; } + inline u_int64_t get_incr_rid() { return _rid++; } + bool wr_reset(); + inline bool is_wr_reset() const { return _reset_ok; } + inline bool owi() const { return _owi; } + inline bool frot() const { return _frot; } + + // Convenience access methods to current file controller + + inline int fh() const { return _curr_fc->wr_fh(); } + + inline u_int32_t subm_cnt_dblks() const { return _curr_fc->wr_subm_cnt_dblks(); } + inline std::size_t subm_offs() const { return _curr_fc->wr_subm_offs(); } + inline u_int32_t add_subm_cnt_dblks(u_int32_t a) { return _curr_fc->add_wr_subm_cnt_dblks(a); } + + inline u_int32_t cmpl_cnt_dblks() const { return _curr_fc->wr_cmpl_cnt_dblks(); } + inline std::size_t cmpl_offs() const { return _curr_fc->wr_cmpl_offs(); } + inline u_int32_t add_cmpl_cnt_dblks(u_int32_t a) { return _curr_fc->add_wr_cmpl_cnt_dblks(a); } + + inline u_int16_t aio_cnt() const { return _curr_fc->aio_cnt(); } + inline u_int16_t incr_aio_cnt() { return _curr_fc->incr_aio_cnt(); } + inline u_int16_t decr_aio_cnt() { return _curr_fc->decr_aio_cnt(); } + + inline bool is_void() const { return _curr_fc->wr_void(); } + inline bool is_empty() const { return _curr_fc->wr_empty(); } + inline u_int32_t remaining_dblks() const { return _curr_fc->wr_remaining_dblks(); } + inline bool is_full() const { return _curr_fc->is_wr_full(); }; + inline bool is_compl() const { return _curr_fc->is_wr_compl(); }; + inline u_int32_t aio_outstanding_dblks() const { return _curr_fc->wr_aio_outstanding_dblks(); } + inline bool file_rotate() const { return _curr_fc->wr_file_rotate(); } + + // Debug aid + std::string status_str() const; + }; + +} // namespace journal +} // namespace mrg + +#endif // ifndef QPID_LEGACYSTORE_JRNL_WRFC_H diff --git a/cpp/src/qpid/legacystore/management-schema.xml b/cpp/src/qpid/legacystore/management-schema.xml new file mode 100644 index 0000000000..65969f0fb2 --- /dev/null +++ b/cpp/src/qpid/legacystore/management-schema.xml @@ -0,0 +1,99 @@ +<schema package="org.apache.qpid.legacystore"> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + + <class name="Store"> + <property name="brokerRef" type="objId" access="RO" references="qpid.Broker" index="y" parentRef="y"/> + <property name="location" type="sstr" access="RO" desc="Logical directory on disk"/> + <property name="defaultInitialFileCount" type="uint16" access="RO" unit="file" desc="Default number of files initially allocated to each journal"/> + <property name="defaultDataFileSize" type="uint32" access="RO" unit="RdPg" desc="Default size of each journal data file"/> + <property name="tplIsInitialized" type="bool" access="RO" desc="Transaction prepared list has been initialized by a transactional prepare"/> + <property name="tplDirectory" type="sstr" access="RO" desc="Transaction prepared list directory"/> + <property name="tplWritePageSize" type="uint32" access="RO" unit="byte" desc="Page size in transaction prepared list write-page-cache"/> + <property name="tplWritePages" type="uint32" access="RO" unit="wpage" desc="Number of pages in transaction prepared list write-page-cache"/> + <property name="tplInitialFileCount" type="uint16" access="RO" unit="file" desc="Number of files initially allocated to transaction prepared list journal"/> + <property name="tplDataFileSize" type="uint32" access="RO" unit="byte" desc="Size of each journal data file in transaction prepared list journal"/> + <property name="tplCurrentFileCount" type="uint32" access="RO" unit="file" desc="Number of files currently allocated to transaction prepared list journal"/> + + <statistic name="tplTransactionDepth" type="hilo32" unit="txn" desc="Number of currently enqueued prepared transactions"/> + <statistic name="tplTxnPrepares" type="count64" unit="record" desc="Total transaction prepares on transaction prepared list"/> + <statistic name="tplTxnCommits" type="count64" unit="record" desc="Total transaction commits on transaction prepared list"/> + <statistic name="tplTxnAborts" type="count64" unit="record" desc="Total transaction aborts on transaction prepared list"/> + <statistic name="tplOutstandingAIOs" type="hilo32" unit="aio_op" desc="Number of currently outstanding AIO requests in Async IO system"/> + </class> + + <class name="Journal"> + <property name="queueRef" type="objId" access="RO" references="qpid.Queue" isGeneralReference="y"/> + <property name="name" type="sstr" access="RO" index="y"/> + <property name="directory" type="sstr" access="RO" desc="Directory containing journal files"/> + <property name="baseFileName" type="sstr" access="RO" desc="Base filename prefix for journal"/> + <property name="writePageSize" type="uint32" access="RO" unit="byte" desc="Page size in write-page-cache"/> + <property name="writePages" type="uint32" access="RO" unit="wpage" desc="Number of pages in write-page-cache"/> + <property name="readPageSize" type="uint32" access="RO" unit="byte" desc="Page size in read-page-cache"/> + <property name="readPages" type="uint32" access="RO" unit="rpage" desc="Number of pages in read-page-cache"/> + <property name="initialFileCount" type="uint16" access="RO" unit="file" desc="Number of files initially allocated to this journal"/> + <property name="autoExpand" type="bool" access="RO" desc="Auto-expand enabled"/> + <property name="currentFileCount" type="uint16" access="RO" unit="file" desc="Number of files currently allocated to this journal"/> + <property name="maxFileCount" type="uint16" access="RO" unit="file" desc="Max number of files allowed for this journal"/> + <property name="dataFileSize" type="uint32" access="RO" unit="byte" desc="Size of each journal data file"/> + + <statistic name="recordDepth" type="hilo32" unit="record" desc="Number of currently enqueued records (durable messages)"/> + <statistic name="enqueues" type="count64" unit="record" desc="Total enqueued records on journal"/> + <statistic name="dequeues" type="count64" unit="record" desc="Total dequeued records on journal"/> + <statistic name="txn" type="count32" unit="record" desc="Total open transactions (xids) on journal"/> + <statistic name="txnEnqueues" type="count64" unit="record" desc="Total transactional enqueued records on journal"/> + <statistic name="txnDequeues" type="count64" unit="record" desc="Total transactional dequeued records on journal"/> + <statistic name="txnCommits" type="count64" unit="record" desc="Total transactional commit records on journal"/> + <statistic name="txnAborts" type="count64" unit="record" desc="Total transactional abort records on journal"/> + <statistic name="outstandingAIOs" type="hilo32" unit="aio_op" desc="Number of currently outstanding AIO requests in Async IO system"/> + +<!-- + The following are not yet "wired up" in JournalImpl.cpp +--> + <statistic name="freeFileCount" type="hilo32" unit="file" desc="Number of files free on this journal. Includes free files trapped in holes."/> + <statistic name="availableFileCount" type="hilo32" unit="file" desc="Number of files available to be written. Excluding holes"/> + <statistic name="writeWaitFailures" type="count64" unit="record" desc="AIO Wait failures on write"/> + <statistic name="writeBusyFailures" type="count64" unit="record" desc="AIO Busy failures on write"/> + <statistic name="readRecordCount" type="count64" unit="record" desc="Records read from the journal"/> + <statistic name="readBusyFailures" type="count64" unit="record" desc="AIO Busy failures on read"/> + <statistic name="writePageCacheDepth" type="hilo32" unit="wpage" desc="Current depth of write-page-cache"/> + <statistic name="readPageCacheDepth" type="hilo32" unit="rpage" desc="Current depth of read-page-cache"/> + + <method name="expand" desc="Increase number of files allocated for this journal"> + <arg name="by" type="uint32" dir="I" desc="Number of files to increase journal size by"/> + </method> + </class> + + <eventArguments> + <arg name="autoExpand" type="bool" desc="Journal auto-expand enabled"/> + <arg name="fileSize" type="uint32" desc="Journal file size in bytes"/> + <arg name="jrnlId" type="sstr" desc="Journal Id"/> + <arg name="numEnq" type="uint32" desc="Number of recovered enqueues"/> + <arg name="numFiles" type="uint16" desc="Number of journal files"/> + <arg name="numTxn" type="uint32" desc="Number of recovered transactions"/> + <arg name="numTxnDeq" type="uint32" desc="Number of recovered transactional dequeues"/> + <arg name="numTxnEnq" type="uint32" desc="Number of recovered transactional enqueues"/> + <arg name="what" type="sstr" desc="Description of event"/> + </eventArguments> + <event name="enqThresholdExceeded" sev="warn" args="jrnlId, what"/> + <event name="created" sev="notice" args="jrnlId, fileSize, numFiles"/> + <event name="full" sev="error" args="jrnlId, what"/> + <event name="recovered" sev="notice" args="jrnlId, fileSize, numFiles, numEnq, numTxn, numTxnEnq, numTxnDeq"/> +</schema> |