diff options
author | Oleksandr Byelkin <sanja@mariadb.com> | 2023-05-17 14:58:11 +0200 |
---|---|---|
committer | Oleksandr Byelkin <sanja@mariadb.com> | 2023-05-17 14:58:11 +0200 |
commit | 2543673dd22782f59299fd2e72179601892bd967 (patch) | |
tree | b73641bd88c9d1572203c75da618fce1937518e8 /sql/sql_statistics.cc | |
parent | 4e5b771e980edfdad5c5414aa62c81d409d585a4 (diff) | |
parent | ef911553f442cbb1baaac2af44c38b54fd058c41 (diff) | |
download | mariadb-git-bb-11.1-release.tar.gz |
Merge branch '11.0' into 11.1bb-11.1-release
Diffstat (limited to 'sql/sql_statistics.cc')
-rw-r--r-- | sql/sql_statistics.cc | 50 |
1 files changed, 8 insertions, 42 deletions
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index ecde847c8d4..ccf1ebb9ef9 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -4029,50 +4029,16 @@ double Histogram_binary::point_selectivity(Field *field, key_range *endpoint, } else { - /* + /* The value 'pos' fits within one single histogram bucket. - Histogram_binary buckets have the same numbers of rows, but they cover - different ranges of values. - - We assume that values are uniformly distributed across the [0..1] value - range. - */ - - /* - If all buckets covered value ranges of the same size, the width of - value range would be: + We also have avg_sel which is per-table average selectivity of col=const. + If there are popular values, this may be larger than one bucket, so + cap the returned number by the selectivity of one bucket. */ double avg_bucket_width= 1.0 / (get_width() + 1); - - /* - Let's see what is the width of value range that our bucket is covering. - (min==max currently. they are kept in the formula just in case we - will want to extend it to handle multi-bucket case) - */ - double inv_prec_factor= (double) 1.0 / prec_factor(); - double current_bucket_width= - (max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) - - (min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor)); - - DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */ - - /* - So: - - each bucket has the same #rows - - values are unformly distributed across the [min_value,max_value] domain. - If a bucket has value range that's N times bigger then average, than - each value will have to have N times fewer rows than average. - */ - sel= avg_sel * avg_bucket_width / current_bucket_width; - - /* - (Q: if we just follow this proportion we may end up in a situation - where number of different values we expect to find in this bucket - exceeds the number of rows that this histogram has in a bucket. Are - we ok with this or we would want to have certain caps?) - */ + sel= MY_MIN(avg_bucket_width, avg_sel); } return sel; } @@ -4147,7 +4113,7 @@ bool is_eits_usable(Field *field) if (!col_stats) return false; - DBUG_ASSERT(field->table->stats_is_read); + DBUG_ASSERT(field->orig_table->stats_is_read); /* (1): checks if we have EITS statistics for a particular column @@ -4160,8 +4126,8 @@ bool is_eits_usable(Field *field) return !col_stats->no_stat_values_provided() && //(1) field->type() != MYSQL_TYPE_GEOMETRY && //(2) #ifdef WITH_PARTITION_STORAGE_ENGINE - (!field->table->part_info || - !field->table->part_info->field_in_partition_expr(field)) && //(3) + (!field->orig_table->part_info || + !field->orig_table->part_info->field_in_partition_expr(field)) && //(3) #endif true; } |