summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Durgin <josh.durgin@inktank.com>2013-04-12 18:33:57 -0700
committerJosh Durgin <josh.durgin@inktank.com>2013-04-12 18:33:57 -0700
commit06a95a6e3856c6c919bc9e3ff9f49b1ac661492d (patch)
tree4b55d2c99fffb36be8a456439cd7f8fbc503b756
parent98e4c86474fc63a2184cfd7088f9637a2b65f428 (diff)
parent98de67d424fd4ea972130ac737062aa8c093cbff (diff)
downloadceph-06a95a6e3856c6c919bc9e3ff9f49b1ac661492d.tar.gz
Merge branch 'next'
-rw-r--r--ceph.spec.in8
-rwxr-xr-xqa/workunits/rbd/image_read.sh608
-rwxr-xr-xqa/workunits/rbd/qemu-iotests.sh22
-rw-r--r--src/Makefile.am1
-rw-r--r--src/client/Client.cc24
-rw-r--r--src/init-radosgw.sysv91
-rw-r--r--src/mds/Capability.h1
-rw-r--r--src/mds/MDCache.cc45
-rw-r--r--src/mds/MDCache.h1
-rw-r--r--src/mds/MDS.cc1
-rw-r--r--src/mds/MDSTable.cc2
-rw-r--r--src/mds/Server.cc15
-rw-r--r--src/mds/mdstypes.cc4
-rw-r--r--src/os/FileJournal.cc22
-rw-r--r--src/os/FileJournal.h2
-rw-r--r--src/os/Journal.h2
-rw-r--r--src/os/JournalingObjectStore.cc5
-rw-r--r--src/osd/OSD.cc9
-rw-r--r--src/test/test_stress_watch.cc6
19 files changed, 815 insertions, 54 deletions
diff --git a/ceph.spec.in b/ceph.spec.in
index fc4d5466db7..1e5d7f5b818 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -289,10 +289,10 @@ make DESTDIR=$RPM_BUILD_ROOT install
find $RPM_BUILD_ROOT -type f -name "*.la" -exec rm -f {} ';'
find $RPM_BUILD_ROOT -type f -name "*.a" -exec rm -f {} ';'
install -D src/init-ceph $RPM_BUILD_ROOT%{_initrddir}/ceph
-install -D src/init-radosgw $RPM_BUILD_ROOT%{_initrddir}/ceph-radosgw
-mkdir -p $RPM_BUILD_ROOT%{_sbindir}
-ln -sf ../../etc/init.d/ceph %{buildroot}/%{_sbindir}/rcceph
-ln -sf ../../etc/init.d/ceph-radosgw %{buildroot}/%{_sbindir}/rcceph-radosgw
+install -D src/init-radosgw.sysv $RPM_BUILD_ROOT%{_initrddir}/ceph-radosgw
+mkdir -p $RPM_BUILD_ROOT/usr/sbin
+ln -sf ../../etc/init.d/ceph %{buildroot}/usr/sbin/rcceph
+ln -sf ../../etc/init.d/ceph-radosgw %{buildroot}/usr/sbin/rcceph-radosgw
install -m 0644 -D src/logrotate.conf $RPM_BUILD_ROOT%{_sysconfdir}/logrotate.d/ceph
install -m 0644 -D src/rgw/logrotate.conf $RPM_BUILD_ROOT%{_sysconfdir}/logrotate.d/radosgw
chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.ceph.conf
diff --git a/qa/workunits/rbd/image_read.sh b/qa/workunits/rbd/image_read.sh
new file mode 100755
index 00000000000..84691f0a89d
--- /dev/null
+++ b/qa/workunits/rbd/image_read.sh
@@ -0,0 +1,608 @@
+#!/bin/bash -e
+
+# Copyright (C) 2013 Inktank Storage, Inc.
+#
+# This is free software; see the source for copying conditions.
+# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as
+# published by the Free Software Foundation version 2.
+
+# Alex Elder <elder@inktank.com>
+# April 10, 2013
+
+################################################################
+
+# The purpose of this test is to validate that data read from a
+# mapped rbd image is what it's expected to be.
+#
+# By default it creates an image and fills it with some data. It
+# then reads back the data at a series of offsets known to cover
+# various situations (such as reading the beginning, end, or the
+# entirety of an object, or doing a read that spans multiple
+# objects), and stashes the results in a set of local files.
+#
+# It also creates and maps a snapshot of the original image after
+# it's been filled, and reads back the same ranges of data from the
+# snapshot. It then compares the data read back with what was read
+# back from the original image, verifying they match.
+#
+# You can optionally test clone functionality as well, in which case
+# a clone is made of the snapshot, and the same ranges of data are
+# again read and compared with the original.
+
+################################################################
+
+# Default parameter values. Environment variables, if set, will
+# supercede these defaults. Such variables have names that begin
+# with "IMAGE_READ_", for e.g. use IMAGE_READ_PAGE_SIZE=65536
+# to use 65536 as the page size.
+
+DEFAULT_LOCAL_FILES=false
+DEFAULT_VERBOSE=true # Change parseargs if you switch this to false
+DEFAULT_TEST_CLONES=false
+DEFAULT_FORMAT=1
+DEFAULT_PAGE_SIZE=4096
+DEFAULT_OBJECT_ORDER=22
+MIN_OBJECT_ORDER=9
+MAX_OBJECT_ORDER=32
+
+PROGNAME=$(basename $0)
+
+[ $(id -u) -eq 0 ] && SUSER=true
+
+ORIGINAL=original-$$
+SNAP1=snap1-$$
+CLONE1=clone1-$$
+
+function err() {
+ if [ $# -gt 0 ]; then
+ echo "${PROGNAME}: $@" >&2
+ fi
+ exit 2
+}
+
+function usage() {
+ if [ $# -gt 0 ]; then
+ echo "" >&2
+ echo "${PROGNAME}: $@" >&2
+ fi
+ echo "" >&2
+ echo "Usage: ${PROGNAME} [<options>]" >&2
+ echo "" >&2
+ echo "options are:" >&2
+ echo " -o object_order" >&2
+ echo " must be ${MIN_OBJECT_ORDER}..${MAX_OBJECT_ORDER}" >&2
+ echo " -p page_size (in bytes)" >&2
+ echo " note: there must be at least 4 pages per object" >&2
+ echo " -1" >&2
+ echo " test using format 1 rbd images (default)" >&2
+ echo " -2" >&2
+ echo " test using format 2 rbd images" >&2
+ echo " -c" >&2
+ echo " also test rbd clone images (implies format 2)" >&2
+ echo " -l" >&2
+ echo " use local files rather than rbd images" >&2
+ echo " -v" >&2
+ echo " disable reporting of what's going on" >&2
+ echo "" >&2
+ exit 1
+}
+
+function verbose() {
+ [ "${VERBOSE}" = true ] && echo "$@"
+ true # Don't let the verbose test spoil our return value
+}
+
+function quiet() {
+ "$@" 2> /dev/null
+}
+
+function boolean_toggle() {
+ [ "${VERBOSE}" = true ] && echo "$@"
+
+}
+function parseargs() {
+ local opts="o:p:12clv"
+ local lopts="order:,page_size:,local,clone,verbose"
+ local parsed
+
+ # use values from environment if available
+ LOCAL_FILES="${IMAGE_READ_LOCAL_FILES:-${DEFAULT_LOCAL_FILES}}"
+ VERBOSE="${IMAGE_READ_VERBOSE:-${DEFAULT_VERBOSE}}"
+ FORMAT="${IMAGE_READ_FORMAT:-${DEFAULT_FORMAT}}"
+ PAGE_SIZE="${IMAGE_READ_PAGE_SIZE:-${DEFAULT_PAGE_SIZE}}"
+ OBJECT_ORDER="${IMAGE_READ_OBJECT_ORDER:-${DEFAULT_OBJECT_ORDER}}"
+
+ parsed=$(getopt -o "${opts}" -l "${lopts}" -n "${PROGNAME}" -- "$@") ||
+ usage
+ eval set -- "${parsed}"
+ while true; do
+ case "$1" in
+ -v|--verbose) VERBOSE=false; shift;; # default true
+ -l|--local) LOCAL_FILES=true; shift;;
+ -1|-2) FORMAT="${1:1}"; shift;;
+ -c|--clone) TEST_CLONES=true; shift;;
+ -o|--order) OBJECT_ORDER="$2"; shift 2;;
+ -p|--page_size) PAGE_SIZE="$2"; shift 2;;
+ --) shift ; break ;;
+ *) err "getopt internal error"
+ esac
+ done
+ [ $# -gt 0 ] && usage "excess arguments ($*)"
+
+ [ "${OBJECT_ORDER}" -lt "${MIN_OBJECT_ORDER}" ] &&
+ usage "object order (${OBJECT_ORDER}) must be" \
+ "at least ${MIN_OBJECT_ORDER}"
+ [ "${OBJECT_ORDER}" -gt "${MAX_OBJECT_ORDER}" ] &&
+ usage "object order (${OBJECT_ORDER}) must be" \
+ "at most ${MAX_OBJECT_ORDER}"
+
+ [ "${TEST_CLONES}" != true ] || FORMAT=2
+
+ OBJECT_SIZE=$(echo "2 ^ ${OBJECT_ORDER}" | bc)
+ OBJECT_PAGES=$(echo "${OBJECT_SIZE} / ${PAGE_SIZE}" | bc)
+ IMAGE_SIZE=$((2 * 16 * OBJECT_SIZE / (1024 * 1024)))
+ [ "${IMAGE_SIZE}" -lt 1 ] && IMAGE_SIZE=1
+ IMAGE_OBJECTS=$((IMAGE_SIZE * (1024 * 1024) / OBJECT_SIZE))
+
+ [ "${OBJECT_PAGES}" -lt 4 ] &&
+ usage "object size (${OBJECT_SIZE}) must be" \
+ "at least 4 * page size (${PAGE_SIZE})"
+
+ verbose "parameters for this run:"
+ verbose " format ${FORMAT} images will be tested"
+ verbose " object order is ${OBJECT_ORDER}, so" \
+ "objects are ${OBJECT_SIZE} bytes"
+ verbose " page size is ${PAGE_SIZE} bytes, so" \
+ "there are are ${OBJECT_PAGES} pages in an object"
+ verbose " derived image size is ${IMAGE_SIZE} MB, so" \
+ "there are ${IMAGE_OBJECTS} objects in an image"
+ [ "${TEST_CLONES}" = true ] &&
+ verbose " clone functionality will be tested"
+ true # Don't let the clones test spoil our return value
+}
+
+function image_dev_path() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ echo "${TEMP}/${image_name}"
+ return
+ fi
+
+ echo "/dev/rbd/rbd/${image_name}"
+}
+
+function out_data_dir() {
+ [ $# -lt 2 ] || exit 99
+ local out_data="${TEMP}/data"
+ local image_name
+
+ if [ $# -eq 1 ]; then
+ image_name="$1"
+ echo "${out_data}/${image_name}"
+ else
+ echo "${out_data}"
+ fi
+}
+
+function setup() {
+ verbose "===== setting up ====="
+ TEMP=$(mktemp -d /tmp/rbd_image_read.XXXXX)
+ mkdir -p $(out_data_dir)
+
+ if [ "${LOCAL_FILES}" != true -a "${SUSER}" != true ]; then
+ # allow ubuntu user to map/unmap rbd devices
+ sudo chown ubuntu /sys/bus/rbd/add
+ sudo chown ubuntu /sys/bus/rbd/remove
+ fi
+ create_image "${ORIGINAL}"
+ map_image "${ORIGINAL}"
+ fill_original
+ create_image_snap "${ORIGINAL}" "${SNAP1}"
+ map_image_snap "${ORIGINAL}" "${SNAP1}"
+ if [ "${TEST_CLONES}" = true ]; then
+ create_snap_clone "${ORIGINAL}" "${SNAP1}" "${CLONE1}"
+ map_image "${CLONE1}"
+ fi
+}
+
+function teardown() {
+ verbose "===== cleaning up ====="
+ if [ "${TEST_CLONES}" = true ]; then
+ unmap_image "${CLONE1}" || true
+ destroy_snap_clone "${ORIGINAL}" "${SNAP1}" "${CLONE1}" || true
+ fi
+ unmap_image_snap "${ORIGINAL}" "${SNAP1}" || true
+ destroy_image_snap "${ORIGINAL}" "${SNAP1}" || true
+ unmap_image "${ORIGINAL}" || true
+ destroy_image "${ORIGINAL}" || true
+ if [ "${LOCAL_FILES}" != true -a "${SUSER}" != true ]; then
+ sudo chown root /sys/bus/rbd/add
+ sudo chown root /sys/bus/rbd/remove
+ fi
+
+ rm -rf $(out_data_dir)
+ rmdir "${TEMP}"
+}
+
+function create_image() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local image_path
+
+ verbose "creating image \"${image_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ image_path=$(image_dev_path "${image_name}")
+ touch "${image_path}"
+ return
+ fi
+
+ rbd create "${image_name}" --image-format "${FORMAT}" \
+ --size "${IMAGE_SIZE}" --order "${OBJECT_ORDER}"
+}
+
+function destroy_image() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local image_path
+
+ verbose "destroying image \"${image_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ image_path=$(image_dev_path "${image_name}")
+ rm -f "${image_path}"
+ return
+ fi
+
+ rbd rm "${image_name}"
+}
+
+function map_image() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1" # can be image@snap too
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ return
+ fi
+
+ rbd map "${image_name}"
+ udevadm settle
+ # allow ubuntu user to write to the device
+ [ "${SUSER}" = true ] ||
+ sudo chown ubuntu $(image_dev_path "${image_name}")
+ true # Don't let the suser test spoil our return value
+}
+
+function unmap_image() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1" # can be image@snap too
+ local image_path
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ return
+ fi
+ image_path=$(image_dev_path "${image_name}")
+
+ if [ -e" ${image_path}" ]; then
+ [ "${SUSER}" = true ] || sudo chown root "${image_path}"
+ udevadm settle
+ rbd unmap "${image_path}"
+ udevadm settle
+ fi
+}
+
+function map_image_snap() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local image_snap
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ return
+ fi
+
+ image_snap="${image_name}@${snap_name}"
+ map_image "${image_snap}"
+}
+
+function unmap_image_snap() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local image_snap
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ return
+ fi
+
+ image_snap="${image_name}@${snap_name}"
+ unmap_image "${image_snap}"
+}
+
+function create_image_snap() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local image_snap="${image_name}@${snap_name}"
+ local image_path
+ local snap_path
+
+ verbose "creating snapshot \"${snap_name}\"" \
+ "of image \"${image_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ image_path=$(image_dev_path "${image_name}")
+ snap_path=$(image_dev_path "${image_snap}")
+
+ cp "${image_path}" "${snap_path}"
+ return
+ fi
+
+ rbd snap create "${image_snap}"
+}
+
+function destroy_image_snap() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local image_snap="${image_name}@${snap_name}"
+ local snap_path
+
+ verbose "destroying snapshot \"${snap_name}\"" \
+ "of image \"${image_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ snap_path=$(image_dev_path "${image_snap}")
+ rm -rf "${snap_path}"
+ return
+ fi
+
+ rbd snap rm "${image_snap}"
+}
+
+function create_snap_clone() {
+ [ $# -eq 3 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local clone_name="$3"
+ local image_snap="${image_name}@${snap_name}"
+ local snap_path
+ local clone_path
+
+ verbose "creating clone image \"${clone_name}\"" \
+ "of image snapshot \"${image_name}@${snap_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ snap_path=$(image_dev_path "${image_name}@${snap_name}")
+ clone_path=$(image_dev_path "${clone_name}")
+
+ cp "${snap_path}" "${clone_path}"
+ return
+ fi
+
+ rbd snap protect "${image_snap}"
+ rbd clone "${image_snap}" "${clone_name}"
+}
+
+function destroy_snap_clone() {
+ [ $# -eq 3 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local clone_name="$3"
+ local image_snap="${image_name}@${snap_name}"
+ local clone_path
+
+ verbose "destroying clone image \"${clone_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ clone_path=$(image_dev_path "${clone_name}")
+
+ rm -rf "${clone_path}"
+ return
+ fi
+
+ rbd rm "${clone_name}"
+ rbd snap unprotect "${image_snap}"
+}
+
+# function that produces "random" data with which to fill the image
+function source_data() {
+ while quiet dd if=/bin/bash skip=$(($$ % 199)) bs="${PAGE_SIZE}"; do
+ : # Just do the dd
+ done
+}
+
+function fill_original() {
+ local image_path=$(image_dev_path "${ORIGINAL}")
+ local bytes=$(echo "${IMAGE_SIZE} * 1024 * 1024 - 1" | bc)
+
+ verbose "filling original image"
+ # Fill 16 objects worth of "random" data
+ source_data |
+ quiet dd bs="${PAGE_SIZE}" count=$((16 * OBJECT_PAGES)) \
+ of="${image_path}"
+ if [ "${LOCAL_FILES}" = true ]; then
+ # Extend it another 16 objects, as a hole in the image
+ quiet dd if=/dev/zero bs=1 count=1 seek=${bytes} \
+ of="${image_path}"
+ fi
+}
+
+function do_read() {
+ [ $# -eq 3 -o $# -eq 4 ] || exit 99
+ local image_name="$1"
+ local offset="$2"
+ local length="$3"
+ [ "${length}" -gt 0 ] || err "do_read: length must be non-zero"
+ local image_path=$(image_dev_path "${image_name}")
+ local out_data=$(out_data_dir "${image_name}")
+ local range=$(printf "%06u~%04u" "${offset}" "${length}")
+ local out_file
+
+ [ $# -eq 4 ] && offset=$((offset + 16 * OBJECT_PAGES))
+
+ verbose "reading \"${image_name}\" pages ${range}"
+
+ out_file="${out_data}/pages_${range}"
+
+ quiet dd bs="${PAGE_SIZE}" skip="${offset}" count="${length}" \
+ if="${image_path}" of="${out_file}"
+}
+
+function one_pass() {
+ [ $# -eq 1 -o $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local extended
+ [ $# -eq 2 ] && extended="true"
+ local offset
+ local length
+
+ offset=0
+
+ # +-----------+-----------+---
+ # |X:X:X...X:X| : : ... : | :
+ # +-----------+-----------+---
+ length="${OBJECT_PAGES}"
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : |X: : ... : | :
+ # ---+-----------+---
+ length=1
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : | :X: ... : | :
+ # ---+-----------+---
+ length=1
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : | : :X...X: | :
+ # ---+-----------+---
+ length=$((OBJECT_PAGES - 3))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : | : : ... :X| :
+ # ---+-----------+---
+ length=1
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : |X:X:X...X:X| :
+ # ---+-----------+---
+ length="${OBJECT_PAGES}"
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ offset=$((offset + 1)) # skip 1
+
+ # ---+-----------+---
+ # : | :X:X...X:X| :
+ # ---+-----------+---
+ length=$((OBJECT_PAGES - 1))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+-----------+---
+ # : |X:X:X...X:X|X: : ... : | :
+ # ---+-----------+-----------+---
+ length=$((OBJECT_PAGES + 1))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+-----------+---
+ # : | :X:X...X:X|X: : ... : | :
+ # ---+-----------+-----------+---
+ length="${OBJECT_PAGES}"
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+-----------+---
+ # : | :X:X...X:X|X:X: ... : | :
+ # ---+-----------+-----------+---
+ length=$((OBJECT_PAGES + 1))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+-----------+---
+ # : | : :X...X:X|X:X:X...X:X| :
+ # ---+-----------+-----------+---
+ length=$((2 * OBJECT_PAGES + 2))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ offset=$((offset + 1)) # skip 1
+
+ # ---+-----------+-----------+-----
+ # : | :X:X...X:X|X:X:X...X:X|X: :
+ # ---+-----------+-----------+-----
+ length=$((2 * OBJECT_PAGES))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # --+-----------+-----------+--------
+ # : | :X:X...X:X|X:X:X...X:X|X:X: :
+ # --+-----------+-----------+--------
+ length=2049
+ length=$((2 * OBJECT_PAGES + 1))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ # offset=$((offset + length))
+}
+
+function run_using() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local out_data=$(out_data_dir "${image_name}")
+
+ verbose "===== running using \"${image_name}\" ====="
+ mkdir -p "${out_data}"
+ one_pass "${image_name}"
+ one_pass "${image_name}" extended
+}
+
+function compare() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local out_data=$(out_data_dir "${image_name}")
+ local original=$(out_data_dir "${ORIGINAL}")
+
+ verbose "===== comparing \"${image_name}\" ====="
+ for i in $(ls "${original}"); do
+ verbose compare "\"${image_name}\" \"${i}\""
+ cmp "${original}/${i}" "${out_data}/${i}"
+ done
+ [ "${image_name}" = "${ORIGINAL}" ] || rm -rf "${out_data}"
+}
+
+function doit() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+
+ run_using "${image_name}"
+ compare "${image_name}"
+}
+
+########## Start
+
+parseargs "$@"
+
+trap teardown EXIT HUP INT
+setup
+
+run_using "${ORIGINAL}"
+doit "${ORIGINAL}@${SNAP1}"
+if [ "${TEST_CLONES}" = true ]; then
+ doit "${CLONE1}"
+fi
+rm -rf $(out_data_dir "${ORIGINAL}")
+
+echo "Success!"
+
+exit 0
diff --git a/qa/workunits/rbd/qemu-iotests.sh b/qa/workunits/rbd/qemu-iotests.sh
new file mode 100755
index 00000000000..9031b1db536
--- /dev/null
+++ b/qa/workunits/rbd/qemu-iotests.sh
@@ -0,0 +1,22 @@
+#!/bin/sh -ex
+
+# Run qemu-iotests against rbd. These are block-level tests that go
+# through qemu but do not involve running a full vm. Note that these
+# require the admin ceph user, as there's no way to pass the ceph user
+# to qemu-iotests currently.
+
+# This will only work with particular qemu versions, like 1.0. Later
+# versions of qemu includ qemu-iotests directly in the qemu
+# repository.
+git clone git://repo.or.cz/qemu-iotests.git
+
+cd qemu-iotests
+mkdir bin
+# qemu-iotests expects a binary called just 'qemu' to be available
+ln -s `which qemu-system-x86_64` bin/qemu
+
+# TEST_DIR is the pool for rbd
+TEST_DIR=rbd PATH="$PATH:$PWD/bin" ./check -rbd
+
+cd ..
+rm -rf qemu-iotests
diff --git a/src/Makefile.am b/src/Makefile.am
index 5fe7da683eb..d528b78a1be 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1168,6 +1168,7 @@ EXTRA_DIST += \
$(srcdir)/verify-mds-journal.sh $(srcdir)/vstart.sh $(srcdir)/stop.sh \
ceph-run $(srcdir)/ceph_common.sh \
$(srcdir)/init-radosgw \
+ $(srcdir)/init-radosgw.sysv \
$(srcdir)/ceph-clsinfo $(srcdir)/make_version $(srcdir)/check_version \
$(srcdir)/.git_version \
$(srcdir)/ceph-rbdnamer \
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 420c3ad00f2..aae22ffa980 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -1534,9 +1534,7 @@ void Client::handle_client_session(MClientSession *m)
case CEPH_SESSION_OPEN:
renew_caps(session);
session->state = MetaSession::STATE_OPEN;
- if (unmounting) {
- _close_mds_session(session);
- } else {
+ if (!unmounting) {
connect_mds_targets(from);
}
signal_cond_list(session->waiting_for_open);
@@ -1966,6 +1964,8 @@ void Client::send_reconnect(MetaSession *session)
resend_unsafe_requests(session);
messenger->send_message(m, session->con);
+
+ mount_cond.Signal();
}
@@ -3778,17 +3778,17 @@ void Client::unmount()
}
- // send session closes!
- for (map<int,MetaSession*>::iterator p = mds_sessions.begin();
- p != mds_sessions.end();
- ++p) {
- if (p->second->state != MetaSession::STATE_CLOSING) {
- _close_mds_session(p->second);
+ while (!mds_sessions.empty()) {
+ // send session closes!
+ for (map<int,MetaSession*>::iterator p = mds_sessions.begin();
+ p != mds_sessions.end();
+ ++p) {
+ if (p->second->state != MetaSession::STATE_CLOSING) {
+ _close_mds_session(p->second);
+ }
}
- }
- // wait for sessions to close
- while (!mds_sessions.empty()) {
+ // wait for sessions to close
ldout(cct, 2) << "waiting for " << mds_sessions.size() << " mds sessions to close" << dendl;
mount_cond.Wait(client_lock);
}
diff --git a/src/init-radosgw.sysv b/src/init-radosgw.sysv
new file mode 100644
index 00000000000..48a7b5b8f96
--- /dev/null
+++ b/src/init-radosgw.sysv
@@ -0,0 +1,91 @@
+#! /bin/bash -x
+### BEGIN INIT INFO
+# Provides: radosgw
+# Required-Start: $remote_fs $named $network $time
+# Required-Stop: $remote_fs $named $network $time
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: radosgw RESTful rados gateway
+### END INIT INFO
+
+PATH=/sbin:/bin:/usr/bin
+
+#. /lib/lsb/init-functions
+. /etc/rc.d/init.d/functions
+
+# prefix for radosgw instances in ceph.conf
+PREFIX='client.radosgw.'
+
+# user to run radosgw as (it not specified in ceph.conf)
+#DEFAULT_USER='www-data'
+DEFAULT_USER='apache'
+
+# directory to write logs to
+LOGDIR='/var/log/radosgw'
+
+RADOSGW=`which radosgw`
+if [ ! -x "$RADOSGW" ]; then
+ exit 0
+fi
+
+# make sure log dir exists
+if [ ! -d "$LOGDIR" ]; then
+ mkdir -p $LOGDIR
+fi
+
+case "$1" in
+ start)
+ for name in `ceph-conf --list-sections $PREFIX`;
+ do
+ auto_start=`ceph-conf -n $name 'auto start'`
+ if [ "$auto_start" = "no" ] || [ "$auto_start" = "false" ] || [ "$auto_start" = "0" ]; then
+ continue
+ fi
+
+ # is the socket defined? if it's not, this instance shouldn't run as a daemon.
+ rgw_socket=`ceph-conf -n $name 'rgw socket path'`
+ if [ -z "$rgw_socket" ]; then
+ continue
+ fi
+
+ # mapped to this host?
+ host=`ceph-conf -n $name host`
+ if [ "$host" != `hostname` ]; then
+ continue
+ fi
+
+ user=`ceph-conf -n $name user`
+ if [ -z "$user" ]; then
+ user="$DEFAULT_USER"
+ fi
+
+ log_file=`ceph-conf -n $name log_file`
+ if [ -n "$log_file" ] && [ ! -e "$log_file" ]; then
+ touch "$log_file"
+ chown $user $log_file
+ fi
+
+ #start-stop-daemon --start -u $user -x $RADOSGW -- -n $name
+ daemon --user="$user" "$RADOSGW -n $name"
+ echo "Starting $name..."
+ done
+ ;;
+ reload)
+ #start-stop-daemon --signal HUP -x $RADOSGW --oknodo
+ killproc $RADOSGW -SIGHUP
+ echo "Reloading radosgw..."
+ ;;
+ restart|force-reload)
+ $0 stop
+ $0 start
+ ;;
+ stop)
+ #start-stop-daemon --stop -x $RADOSGW --oknodo
+ killproc $RADOSGW
+ echo "Stopping radosgw..."
+ ;;
+ *)
+ echo "Usage: $0 start|stop|restart" >&2
+ exit 3
+ ;;
+esac \ No newline at end of file
diff --git a/src/mds/Capability.h b/src/mds/Capability.h
index 946afdc02b9..54d2312daeb 100644
--- a/src/mds/Capability.h
+++ b/src/mds/Capability.h
@@ -272,6 +272,7 @@ public:
Export make_export() {
return Export(_wanted, issued(), pending(), client_follows, mseq+1, last_issue_stamp);
}
+ void rejoin_import() { mseq++; }
void merge(Export& other) {
// issued + pending
int newpending = other.pending | pending();
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 3f090bb3238..3129ed7c267 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -4097,20 +4097,25 @@ bool MDCache::parallel_fetch_traverse_dir(inodeno_t ino, filepath& path,
frag_t fg = cur->pick_dirfrag(path[i]);
CDir *dir = cur->get_or_open_dirfrag(this, fg);
CDentry *dn = dir->lookup(path[i]);
- CDentry::linkage_t *dnl = dn->get_linkage();
- if (!dn || dnl->is_null()) {
- if (!dir->is_complete()) {
- // fetch dir
- fetch_queue.insert(dir);
- return false;
- } else {
+ CDentry::linkage_t *dnl = dn ? dn->get_linkage() : NULL;
+
+ if (!dnl || dnl->is_null()) {
+ if (!dir->is_auth()) {
+ dout(10) << " not dirfrag auth " << *dir << dendl;
+ return true;
+ }
+ if (dnl || dir->is_complete()) {
// probably because the client created it and held a cap but it never committed
// to the journal, and the op hasn't replayed yet.
dout(5) << " dne (not created yet?) " << ino << " at " << path << dendl;
missing.insert(ino);
return true;
}
+ // fetch dir
+ fetch_queue.insert(dir);
+ return false;
}
+
cur = dnl->get_inode();
if (!cur) {
assert(dnl->is_remote());
@@ -5041,8 +5046,32 @@ void MDCache::rejoin_import_cap(CInode *in, client_t client, ceph_mds_cap_reconn
Capability *cap = in->reconnect_cap(client, icr, session);
- if (frommds >= 0)
+ if (frommds >= 0) {
+ cap->rejoin_import();
do_cap_import(session, in, cap);
+ }
+}
+
+void MDCache::export_remaining_imported_caps()
+{
+ dout(10) << "export_remaining_imported_caps" << dendl;
+
+ for (map<inodeno_t,map<client_t,map<int,ceph_mds_cap_reconnect> > >::iterator p = cap_imports.begin();
+ p != cap_imports.end();
+ ++p) {
+ for (map<client_t,map<int,ceph_mds_cap_reconnect> >::iterator q = p->second.begin();
+ q != p->second.end();
+ ++q) {
+ Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(q->first.v));
+ if (session) {
+ // mark client caps stale.
+ MClientCaps *stale = new MClientCaps(CEPH_CAP_OP_EXPORT, p->first, 0, 0, 0);
+ mds->send_message_client_counted(stale, q->first);
+ }
+ }
+ }
+
+ cap_imports.clear();
}
void MDCache::try_reconnect_cap(CInode *in, Session *session)
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index 73780e26892..d837586a3ac 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -486,6 +486,7 @@ public:
void rejoin_import_cap(CInode *in, client_t client, ceph_mds_cap_reconnect& icr, int frommds);
void finish_snaprealm_reconnect(client_t client, SnapRealm *realm, snapid_t seq);
void try_reconnect_cap(CInode *in, Session *session);
+ void export_remaining_imported_caps();
// cap imports. delayed snap parent opens.
// realm inode -> client -> cap inodes needing to split to this realm
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc
index 3b3b2d6dc2e..935fb0c417e 100644
--- a/src/mds/MDS.cc
+++ b/src/mds/MDS.cc
@@ -1504,6 +1504,7 @@ void MDS::active_start()
mdcache->clean_open_file_lists();
mdcache->scan_stray_dir();
+ mdcache->export_remaining_imported_caps();
finish_contexts(g_ceph_context, waiting_for_replay); // kick waiters
finish_contexts(g_ceph_context, waiting_for_active); // kick waiters
}
diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc
index 6db2e9b071f..b90755c1854 100644
--- a/src/mds/MDSTable.cc
+++ b/src/mds/MDSTable.cc
@@ -146,7 +146,7 @@ void MDSTable::load_2(int r, bufferlist& bl, Context *onfinish)
decode_state(p);
}
else {
- dout(10) << "load_2 found no table" << dendl;
+ dout(10) << "load_2 could not read table; error: " << r << dendl;
assert(0); // this shouldn't happen if mkfs finished.
reset();
}
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index dc7ea23f763..11ab834d856 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -574,7 +574,7 @@ void Server::handle_client_reconnect(MClientReconnect *m)
// notify client of success with an OPEN
mds->messenger->send_message(new MClientSession(CEPH_SESSION_OPEN), m->get_connection());
-
+
if (session->is_closed()) {
dout(10) << " session is closed, will make best effort to reconnect "
<< m->get_source_inst() << dendl;
@@ -636,15 +636,12 @@ void Server::handle_client_reconnect(MClientReconnect *m)
}
filepath path(p->second.path, (uint64_t)p->second.capinfo.pathbase);
- if ((in && !in->is_auth()) ||
- !mds->mdcache->path_is_mine(path)) {
+ if (in && !in->is_auth()) {
// not mine.
dout(0) << "non-auth " << p->first << " " << path
<< ", will pass off to authority" << dendl;
// mark client caps stale.
- inode_t fake_inode;
- fake_inode.ino = p->first;
MClientCaps *stale = new MClientCaps(CEPH_CAP_OP_EXPORT, p->first, 0, 0, 0);
//stale->head.migrate_seq = 0; // FIXME ******
mds->send_message_client_counted(stale, session);
@@ -652,11 +649,11 @@ void Server::handle_client_reconnect(MClientReconnect *m)
// add to cap export list.
mdcache->rejoin_export_caps(p->first, from, p->second);
} else {
- // mine. fetch later.
+ // don't know if the inode is mine
dout(0) << "missing " << p->first << " " << path
- << " (mine), will load later" << dendl;
- mdcache->rejoin_recovered_caps(p->first, from, p->second,
- -1); // "from" me.
+ << " will load or export later" << dendl;
+ mdcache->rejoin_recovered_caps(p->first, from, p->second, -1);
+ mdcache->rejoin_export_caps(p->first, from, p->second);
}
}
diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc
index ad4a71acba5..b1ce640a539 100644
--- a/src/mds/mdstypes.cc
+++ b/src/mds/mdstypes.cc
@@ -541,9 +541,9 @@ void session_info_t::decode(bufferlist::iterator& p)
{
DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, p);
::decode(inst, p);
- if (struct_v == 2) {
+ if (struct_v <= 2) {
set<tid_t> s;
- ::decode(completed_requests, p);
+ ::decode(s, p);
while (!s.empty()) {
completed_requests[*s.begin()] = inodeno_t();
s.erase(s.begin());
diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc
index 29139c12bca..d8a6f5a1a68 100644
--- a/src/os/FileJournal.cc
+++ b/src/os/FileJournal.cc
@@ -1480,7 +1480,7 @@ void FileJournal::pop_write()
writeq.pop_front();
}
-void FileJournal::commit_start()
+void FileJournal::commit_start(uint64_t seq)
{
dout(10) << "commit_start" << dendl;
@@ -1490,8 +1490,18 @@ void FileJournal::commit_start()
break; // all good
case FULL_FULL:
- dout(1) << " FULL_FULL -> FULL_WAIT. last commit epoch committed, waiting for a new one to start." << dendl;
- full_state = FULL_WAIT;
+ if (seq >= journaled_seq) {
+ dout(1) << " FULL_FULL -> FULL_WAIT. commit_start on seq "
+ << seq << " > journaled_seq " << journaled_seq
+ << ", moving to FULL_WAIT."
+ << dendl;
+ full_state = FULL_WAIT;
+ } else {
+ dout(1) << "FULL_FULL commit_start on seq "
+ << seq << " < journaled_seq " << journaled_seq
+ << ", remaining in FULL_FULL"
+ << dendl;
+ }
break;
case FULL_WAIT:
@@ -1525,10 +1535,10 @@ void FileJournal::committed_thru(uint64_t seq)
}
if (!journalq.empty()) {
header.start = journalq.front().second;
- header.start_seq = journalq.front().first + 1;
+ header.start_seq = journalq.front().first;
} else {
header.start = write_pos;
- header.start_seq = journaled_seq + 1;
+ header.start_seq = seq + 1;
}
must_write_header = true;
print_header();
@@ -1537,7 +1547,7 @@ void FileJournal::committed_thru(uint64_t seq)
Mutex::Locker locker(finisher_lock);
// completions!
queue_completions_thru(seq);
- if (plug_journal_completions) {
+ if (plug_journal_completions && seq >= header.start_seq) {
dout(10) << " removing completion plug, queuing completions thru journaled_seq " << journaled_seq << dendl;
plug_journal_completions = false;
queue_completions_thru(journaled_seq);
diff --git a/src/os/FileJournal.h b/src/os/FileJournal.h
index 0e826fb4940..38e32324dca 100644
--- a/src/os/FileJournal.h
+++ b/src/os/FileJournal.h
@@ -403,7 +403,7 @@ private:
void make_writeable();
// writes
- void commit_start();
+ void commit_start(uint64_t seq);
void committed_thru(uint64_t seq);
bool should_commit_now() {
return full_state != FULL_NOTFULL;
diff --git a/src/os/Journal.h b/src/os/Journal.h
index 8241edc783d..1d413bb4c53 100644
--- a/src/os/Journal.h
+++ b/src/os/Journal.h
@@ -60,7 +60,7 @@ public:
virtual void submit_entry(uint64_t seq, bufferlist& e, int alignment,
Context *oncommit,
TrackedOpRef osd_op = TrackedOpRef()) = 0;
- virtual void commit_start() = 0;
+ virtual void commit_start(uint64_t seq) = 0;
virtual void committed_thru(uint64_t seq) = 0;
/// Read next journal entry - asserts on invalid journal
diff --git a/src/os/JournalingObjectStore.cc b/src/os/JournalingObjectStore.cc
index e65f010443f..e662580ac42 100644
--- a/src/os/JournalingObjectStore.cc
+++ b/src/os/JournalingObjectStore.cc
@@ -177,6 +177,7 @@ bool JournalingObjectStore::ApplyManager::commit_start()
{
bool ret = false;
+ uint64_t _committing_seq = 0;
{
Mutex::Locker l(apply_lock);
dout(10) << "commit_start max_applied_seq " << max_applied_seq
@@ -198,7 +199,7 @@ bool JournalingObjectStore::ApplyManager::commit_start()
goto out;
}
- committing_seq = max_applied_seq;
+ _committing_seq = committing_seq = max_applied_seq;
dout(10) << "commit_start committing " << committing_seq
<< ", still blocked" << dendl;
@@ -208,7 +209,7 @@ bool JournalingObjectStore::ApplyManager::commit_start()
out:
if (journal)
- journal->commit_start(); // tell the journal too
+ journal->commit_start(_committing_seq); // tell the journal too
return ret;
}
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 60add150d5b..ba502e6112d 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1606,6 +1606,7 @@ void OSD::load_pgs()
dout(10) << "load_pgs ignoring unrecognized " << *it << dendl;
}
+ bool has_upgraded = false;
for (map<pg_t, interval_set<snapid_t> >::iterator i = pgs.begin();
i != pgs.end();
++i) {
@@ -1639,8 +1640,12 @@ void OSD::load_pgs()
pg->read_state(store, bl);
if (pg->must_upgrade()) {
- derr << "PG " << pg->info.pgid
- << " must upgrade..." << dendl;
+ if (!has_upgraded) {
+ derr << "PGs are upgrading" << dendl;
+ has_upgraded = true;
+ }
+ dout(10) << "PG " << pg->info.pgid
+ << " must upgrade..." << dendl;
pg->upgrade(store, i->second);
} else {
assert(i->second.empty());
diff --git a/src/test/test_stress_watch.cc b/src/test/test_stress_watch.cc
index 2192815ab2e..d34e9ffb53f 100644
--- a/src/test/test_stress_watch.cc
+++ b/src/test/test_stress_watch.cc
@@ -72,16 +72,10 @@ TEST(WatchStress, Stress1) {
uint64_t handle;
WatchNotifyTestCtx ctx;
- utime_t duration = ceph_clock_now(NULL);
ASSERT_EQ(0, ioctx.watch("foo", 0, &handle, &ctx));
- duration = ceph_clock_now(NULL) - duration;
- ASSERT_LT(duration.sec(), 5);
bufferlist bl2;
- duration = ceph_clock_now(NULL);
ASSERT_EQ(0, ioctx.notify("foo", 0, bl2));
- duration = ceph_clock_now(NULL) - duration;
- ASSERT_LT(duration.sec(), 5);
TestAlarm alarm;
sem_wait(&sem);