diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
commit | cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch) | |
tree | da27775a2161723ef342e91af41a8b51fedef405 /subversion/svnfsfs/stats-cmd.c | |
parent | bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff) | |
download | subversion-tarball-master.tar.gz |
subversion-1.9.7HEADsubversion-1.9.7master
Diffstat (limited to 'subversion/svnfsfs/stats-cmd.c')
-rw-r--r-- | subversion/svnfsfs/stats-cmd.c | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/subversion/svnfsfs/stats-cmd.c b/subversion/svnfsfs/stats-cmd.c new file mode 100644 index 0000000..6e820cb --- /dev/null +++ b/subversion/svnfsfs/stats-cmd.c @@ -0,0 +1,509 @@ +/* stats-cmd.c -- implements the size stats sub-command. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <assert.h> + +#include "svn_fs.h" +#include "svn_pools.h" +#include "svn_sorts.h" + +#include "private/svn_sorts_private.h" +#include "private/svn_string_private.h" +#include "private/svn_fs_fs_private.h" + +#include "svn_private_config.h" +#include "svnfsfs.h" + +/* Return the string, allocated in RESULT_POOL, describing the value 2**I. + */ +static const char * +print_two_power(int i, + apr_pool_t *result_pool) +{ + /* These are the SI prefixes for base-1000, the binary ones with base-1024 + are too clumsy and require appending B for "byte" to be intelligible, + e.g. "MiB". + + Therefore, we ignore the official standard and revert to the traditional + contextual use were the base-1000 prefixes are understood as base-1024 + when it came to data sizes. + */ + const char *si_prefixes = " kMGTPEZY"; + + int number = (i >= 0) ? (1 << (i % 10)) : 0; + int thousands = (i >= 0) ? (i / 10) : 0; + + char si_prefix = (thousands < strlen(si_prefixes)) + ? si_prefixes[thousands] + : '?'; + + if (si_prefix == ' ') + return apr_psprintf(result_pool, "%d", number); + + return apr_psprintf(result_pool, "%d%c", number, si_prefix); +} + +/* Print statistics for the given group of representations to console. + * Use POOL for allocations. + */ +static void +print_rep_stats(svn_fs_fs__representation_stats_t *stats, + apr_pool_t *pool) +{ + printf(_("%20s bytes in %12s reps\n" + "%20s bytes in %12s shared reps\n" + "%20s bytes expanded size\n" + "%20s bytes expanded shared size\n" + "%20s bytes with rep-sharing off\n" + "%20s shared references\n"), + svn__ui64toa_sep(stats->total.packed_size, ',', pool), + svn__ui64toa_sep(stats->total.count, ',', pool), + svn__ui64toa_sep(stats->shared.packed_size, ',', pool), + svn__ui64toa_sep(stats->shared.count, ',', pool), + svn__ui64toa_sep(stats->total.expanded_size, ',', pool), + svn__ui64toa_sep(stats->shared.expanded_size, ',', pool), + svn__ui64toa_sep(stats->expanded_size, ',', pool), + svn__ui64toa_sep(stats->references - stats->total.count, ',', pool)); +} + +/* Print the (used) contents of CHANGES. Use POOL for allocations. + */ +static void +print_largest_reps(svn_fs_fs__largest_changes_t *changes, + apr_pool_t *pool) +{ + apr_size_t i; + for (i = 0; i < changes->count && changes->changes[i]->size; ++i) + printf(_("%12s r%-8ld %s\n"), + svn__ui64toa_sep(changes->changes[i]->size, ',', pool), + changes->changes[i]->revision, + changes->changes[i]->path->data); +} + +/* Print the non-zero section of HISTOGRAM to console. + * Use POOL for allocations. + */ +static void +print_histogram(svn_fs_fs__histogram_t *histogram, + apr_pool_t *pool) +{ + int first = 0; + int last = 63; + int i; + + /* identify non-zero range */ + while (last > 0 && histogram->lines[last].count == 0) + --last; + + while (first <= last && histogram->lines[first].count == 0) + ++first; + + /* display histogram lines */ + for (i = last; i >= first; --i) + printf(_(" %4s .. < %-4s %19s (%2d%%) bytes in %12s (%2d%%) items\n"), + print_two_power(i-1, pool), print_two_power(i, pool), + svn__ui64toa_sep(histogram->lines[i].sum, ',', pool), + (int)(histogram->lines[i].sum * 100 / histogram->total.sum), + svn__ui64toa_sep(histogram->lines[i].count, ',', pool), + (int)(histogram->lines[i].count * 100 / histogram->total.count)); +} + +/* COMPARISON_FUNC for svn_sort__hash. + * Sort extension_info_t values by total count in descending order. + */ +static int +compare_count(const svn_sort__item_t *a, + const svn_sort__item_t *b) +{ + const svn_fs_fs__extension_info_t *lhs = a->value; + const svn_fs_fs__extension_info_t *rhs = b->value; + apr_int64_t diff = lhs->node_histogram.total.count + - rhs->node_histogram.total.count; + + return diff > 0 ? -1 : (diff < 0 ? 1 : 0); +} + +/* COMPARISON_FUNC for svn_sort__hash. + * Sort extension_info_t values by total uncompressed size in descending order. + */ +static int +compare_node_size(const svn_sort__item_t *a, + const svn_sort__item_t *b) +{ + const svn_fs_fs__extension_info_t *lhs = a->value; + const svn_fs_fs__extension_info_t *rhs = b->value; + apr_int64_t diff = lhs->node_histogram.total.sum + - rhs->node_histogram.total.sum; + + return diff > 0 ? -1 : (diff < 0 ? 1 : 0); +} + +/* COMPARISON_FUNC for svn_sort__hash. + * Sort extension_info_t values by total prep count in descending order. + */ +static int +compare_rep_size(const svn_sort__item_t *a, + const svn_sort__item_t *b) +{ + const svn_fs_fs__extension_info_t *lhs = a->value; + const svn_fs_fs__extension_info_t *rhs = b->value; + apr_int64_t diff = lhs->rep_histogram.total.sum + - rhs->rep_histogram.total.sum; + + return diff > 0 ? -1 : (diff < 0 ? 1 : 0); +} + +/* Return an array of extension_info_t* for the (up to) 16 most prominent + * extensions in STATS according to the sort criterion COMPARISON_FUNC. + * Allocate results in POOL. + */ +static apr_array_header_t * +get_by_extensions(svn_fs_fs__stats_t *stats, + int (*comparison_func)(const svn_sort__item_t *, + const svn_sort__item_t *), + apr_pool_t *pool) +{ + /* sort all data by extension */ + apr_array_header_t *sorted + = svn_sort__hash(stats->by_extension, comparison_func, pool); + + /* select the top (first) 16 entries */ + int count = MIN(sorted->nelts, 16); + apr_array_header_t *result + = apr_array_make(pool, count, sizeof(svn_fs_fs__extension_info_t*)); + int i; + + for (i = 0; i < count; ++i) + APR_ARRAY_PUSH(result, svn_fs_fs__extension_info_t*) + = APR_ARRAY_IDX(sorted, i, svn_sort__item_t).value; + + return result; +} + +/* Add all extension_info_t* entries of TO_ADD not already in TARGET to + * TARGET. + */ +static void +merge_by_extension(apr_array_header_t *target, + apr_array_header_t *to_add) +{ + int i, k, count; + + count = target->nelts; + for (i = 0; i < to_add->nelts; ++i) + { + svn_fs_fs__extension_info_t *info + = APR_ARRAY_IDX(to_add, i, svn_fs_fs__extension_info_t *); + for (k = 0; k < count; ++k) + if (info == APR_ARRAY_IDX(target, k, svn_fs_fs__extension_info_t *)) + break; + + if (k == count) + APR_ARRAY_PUSH(target, svn_fs_fs__extension_info_t*) = info; + } +} + +/* Print the (up to) 16 extensions in STATS with the most changes. + * Use POOL for allocations. + */ +static void +print_extensions_by_changes(svn_fs_fs__stats_t *stats, + apr_pool_t *pool) +{ + apr_array_header_t *data = get_by_extensions(stats, compare_count, pool); + apr_int64_t sum = 0; + int i; + + for (i = 0; i < data->nelts; ++i) + { + svn_fs_fs__extension_info_t *info + = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *); + + /* If there are elements, then their count cannot be 0. */ + assert(stats->file_histogram.total.count); + + sum += info->node_histogram.total.count; + printf(_("%11s %20s (%2d%%) representations\n"), + info->extension, + svn__ui64toa_sep(info->node_histogram.total.count, ',', pool), + (int)(info->node_histogram.total.count * 100 / + stats->file_histogram.total.count)); + } + + if (stats->file_histogram.total.count) + { + printf(_("%11s %20s (%2d%%) representations\n"), + "(others)", + svn__ui64toa_sep(stats->file_histogram.total.count - sum, ',', + pool), + (int)((stats->file_histogram.total.count - sum) * 100 / + stats->file_histogram.total.count)); + } +} + +/* Calculate a percentage, handling edge cases. */ +static int +get_percentage(apr_uint64_t part, + apr_uint64_t total) +{ + /* This include total == 0. */ + if (part >= total) + return 100; + + /* Standard case. */ + return (int)(part * 100.0 / total); +} + +/* Print the (up to) 16 extensions in STATS with the largest total size of + * changed file content. Use POOL for allocations. + */ +static void +print_extensions_by_nodes(svn_fs_fs__stats_t *stats, + apr_pool_t *pool) +{ + apr_array_header_t *data = get_by_extensions(stats, compare_node_size, pool); + apr_int64_t sum = 0; + int i; + + for (i = 0; i < data->nelts; ++i) + { + svn_fs_fs__extension_info_t *info + = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *); + sum += info->node_histogram.total.sum; + printf(_("%11s %20s (%2d%%) bytes\n"), + info->extension, + svn__ui64toa_sep(info->node_histogram.total.sum, ',', pool), + get_percentage(info->node_histogram.total.sum, + stats->file_histogram.total.sum)); + } + + if (stats->file_histogram.total.sum > sum) + { + /* Total sum can't be zero here. */ + printf(_("%11s %20s (%2d%%) bytes\n"), + "(others)", + svn__ui64toa_sep(stats->file_histogram.total.sum - sum, ',', + pool), + get_percentage(stats->file_histogram.total.sum - sum, + stats->file_histogram.total.sum)); + } +} + +/* Print the (up to) 16 extensions in STATS with the largest total size of + * changed file content. Use POOL for allocations. + */ +static void +print_extensions_by_reps(svn_fs_fs__stats_t *stats, + apr_pool_t *pool) +{ + apr_array_header_t *data = get_by_extensions(stats, compare_rep_size, pool); + apr_int64_t sum = 0; + int i; + + for (i = 0; i < data->nelts; ++i) + { + svn_fs_fs__extension_info_t *info + = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *); + sum += info->rep_histogram.total.sum; + printf(_("%11s %20s (%2d%%) bytes\n"), + info->extension, + svn__ui64toa_sep(info->rep_histogram.total.sum, ',', pool), + get_percentage(info->rep_histogram.total.sum, + stats->rep_size_histogram.total.sum)); + } + + if (stats->rep_size_histogram.total.sum > sum) + { + /* Total sum can't be zero here. */ + printf(_("%11s %20s (%2d%%) bytes\n"), + "(others)", + svn__ui64toa_sep(stats->rep_size_histogram.total.sum - sum, ',', + pool), + get_percentage(stats->rep_size_histogram.total.sum - sum, + stats->rep_size_histogram.total.sum)); + } +} + +/* Print per-extension histograms for the most frequent extensions in STATS. + * Use POOL for allocations. */ +static void +print_histograms_by_extension(svn_fs_fs__stats_t *stats, + apr_pool_t *pool) +{ + apr_array_header_t *data = get_by_extensions(stats, compare_count, pool); + int i; + + merge_by_extension(data, get_by_extensions(stats, compare_node_size, pool)); + merge_by_extension(data, get_by_extensions(stats, compare_rep_size, pool)); + + for (i = 0; i < data->nelts; ++i) + { + svn_fs_fs__extension_info_t *info + = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *); + printf("\nHistogram of '%s' file sizes:\n", info->extension); + print_histogram(&info->node_histogram, pool); + printf("\nHistogram of '%s' file representation sizes:\n", + info->extension); + print_histogram(&info->rep_histogram, pool); + } +} + +/* Print the contents of STATS to the console. + * Use POOL for allocations. + */ +static void +print_stats(svn_fs_fs__stats_t *stats, + apr_pool_t *pool) +{ + /* print results */ + printf("\nGlobal statistics:\n"); + printf(_("%20s bytes in %12s revisions\n" + "%20s bytes in %12s changes\n" + "%20s bytes in %12s node revision records\n" + "%20s bytes in %12s representations\n" + "%20s bytes expanded representation size\n" + "%20s bytes with rep-sharing off\n"), + svn__ui64toa_sep(stats->total_size, ',', pool), + svn__ui64toa_sep(stats->revision_count, ',', pool), + svn__ui64toa_sep(stats->change_len, ',', pool), + svn__ui64toa_sep(stats->change_count, ',', pool), + svn__ui64toa_sep(stats->total_node_stats.size, ',', pool), + svn__ui64toa_sep(stats->total_node_stats.count, ',', pool), + svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',', + pool), + svn__ui64toa_sep(stats->total_rep_stats.total.count, ',', pool), + svn__ui64toa_sep(stats->total_rep_stats.total.expanded_size, ',', + pool), + svn__ui64toa_sep(stats->total_rep_stats.expanded_size, ',', pool)); + + printf("\nNoderev statistics:\n"); + printf(_("%20s bytes in %12s nodes total\n" + "%20s bytes in %12s directory noderevs\n" + "%20s bytes in %12s file noderevs\n"), + svn__ui64toa_sep(stats->total_node_stats.size, ',', pool), + svn__ui64toa_sep(stats->total_node_stats.count, ',', pool), + svn__ui64toa_sep(stats->dir_node_stats.size, ',', pool), + svn__ui64toa_sep(stats->dir_node_stats.count, ',', pool), + svn__ui64toa_sep(stats->file_node_stats.size, ',', pool), + svn__ui64toa_sep(stats->file_node_stats.count, ',', pool)); + + printf("\nRepresentation statistics:\n"); + printf(_("%20s bytes in %12s representations total\n" + "%20s bytes in %12s directory representations\n" + "%20s bytes in %12s file representations\n" + "%20s bytes in %12s representations of added file nodes\n" + "%20s bytes in %12s directory property representations\n" + "%20s bytes in %12s file property representations\n" + "%20s bytes in header & footer overhead\n"), + svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',', + pool), + svn__ui64toa_sep(stats->total_rep_stats.total.count, ',', pool), + svn__ui64toa_sep(stats->dir_rep_stats.total.packed_size, ',', + pool), + svn__ui64toa_sep(stats->dir_rep_stats.total.count, ',', pool), + svn__ui64toa_sep(stats->file_rep_stats.total.packed_size, ',', + pool), + svn__ui64toa_sep(stats->file_rep_stats.total.count, ',', pool), + svn__ui64toa_sep(stats->added_rep_size_histogram.total.sum, ',', + pool), + svn__ui64toa_sep(stats->added_rep_size_histogram.total.count, ',', + pool), + svn__ui64toa_sep(stats->dir_prop_rep_stats.total.packed_size, ',', + pool), + svn__ui64toa_sep(stats->dir_prop_rep_stats.total.count, ',', pool), + svn__ui64toa_sep(stats->file_prop_rep_stats.total.packed_size, ',', + pool), + svn__ui64toa_sep(stats->file_prop_rep_stats.total.count, ',', pool), + svn__ui64toa_sep(stats->total_rep_stats.total.overhead_size, ',', + pool)); + + printf("\nDirectory representation statistics:\n"); + print_rep_stats(&stats->dir_rep_stats, pool); + printf("\nFile representation statistics:\n"); + print_rep_stats(&stats->file_rep_stats, pool); + printf("\nDirectory property representation statistics:\n"); + print_rep_stats(&stats->dir_prop_rep_stats, pool); + printf("\nFile property representation statistics:\n"); + print_rep_stats(&stats->file_prop_rep_stats, pool); + + printf("\nLargest representations:\n"); + print_largest_reps(stats->largest_changes, pool); + printf("\nExtensions by number of representations:\n"); + print_extensions_by_changes(stats, pool); + printf("\nExtensions by size of changed files:\n"); + print_extensions_by_nodes(stats, pool); + printf("\nExtensions by size of representations:\n"); + print_extensions_by_reps(stats, pool); + + printf("\nHistogram of expanded node sizes:\n"); + print_histogram(&stats->node_size_histogram, pool); + printf("\nHistogram of representation sizes:\n"); + print_histogram(&stats->rep_size_histogram, pool); + printf("\nHistogram of file sizes:\n"); + print_histogram(&stats->file_histogram, pool); + printf("\nHistogram of file representation sizes:\n"); + print_histogram(&stats->file_rep_histogram, pool); + printf("\nHistogram of file property sizes:\n"); + print_histogram(&stats->file_prop_histogram, pool); + printf("\nHistogram of file property representation sizes:\n"); + print_histogram(&stats->file_prop_rep_histogram, pool); + printf("\nHistogram of directory sizes:\n"); + print_histogram(&stats->dir_histogram, pool); + printf("\nHistogram of directory representation sizes:\n"); + print_histogram(&stats->dir_rep_histogram, pool); + printf("\nHistogram of directory property sizes:\n"); + print_histogram(&stats->dir_prop_histogram, pool); + printf("\nHistogram of directory property representation sizes:\n"); + print_histogram(&stats->dir_prop_rep_histogram, pool); + + print_histograms_by_extension(stats, pool); +} + +/* Our progress function simply prints the REVISION number and makes it + * appear immediately. + */ +static void +print_progress(svn_revnum_t revision, + void *baton, + apr_pool_t *pool) +{ + printf("%8ld", revision); + fflush(stdout); +} + +/* This implements `svn_opt_subcommand_t'. */ +svn_error_t * +subcommand__stats(apr_getopt_t *os, void *baton, apr_pool_t *pool) +{ + svnfsfs__opt_state *opt_state = baton; + svn_fs_fs__stats_t *stats; + svn_fs_t *fs; + + printf("Reading revisions\n"); + SVN_ERR(open_fs(&fs, opt_state->repository_path, pool)); + SVN_ERR(svn_fs_fs__get_stats(&stats, fs, print_progress, NULL, + check_cancel, NULL, pool, pool)); + + print_stats(stats, pool); + + return SVN_NO_ERROR; +} |