summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2019-03-22 12:09:32 +0100
committerPeter Eisentraut <peter@eisentraut.org>2019-03-22 12:12:43 +0100
commit5e1963fb764e9cc092e0f7b58b28985c311431d9 (patch)
tree544492f24e3d48d00bd2a19c11663f84f1e18ce4 /src/backend
parent2ab6d28d233af17987ea323e3235b2bda89b4f2e (diff)
downloadpostgresql-5e1963fb764e9cc092e0f7b58b28985c311431d9.tar.gz
Collations with nondeterministic comparison
This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/hash/hashfunc.c100
-rw-r--r--src/backend/access/spgist/spgtextproc.c3
-rw-r--r--src/backend/catalog/pg_collation.c2
-rw-r--r--src/backend/commands/collationcmds.c25
-rw-r--r--src/backend/commands/extension.c6
-rw-r--r--src/backend/executor/execExpr.c4
-rw-r--r--src/backend/executor/execGrouping.c14
-rw-r--r--src/backend/executor/execPartition.c1
-rw-r--r--src/backend/executor/execReplication.c5
-rw-r--r--src/backend/executor/nodeAgg.c9
-rw-r--r--src/backend/executor/nodeGroup.c1
-rw-r--r--src/backend/executor/nodeHash.c14
-rw-r--r--src/backend/executor/nodeHashjoin.c5
-rw-r--r--src/backend/executor/nodeRecursiveunion.c1
-rw-r--r--src/backend/executor/nodeSetOp.c2
-rw-r--r--src/backend/executor/nodeSubplan.c14
-rw-r--r--src/backend/executor/nodeUnique.c1
-rw-r--r--src/backend/executor/nodeWindowAgg.c2
-rw-r--r--src/backend/nodes/copyfuncs.c7
-rw-r--r--src/backend/nodes/outfuncs.c7
-rw-r--r--src/backend/nodes/readfuncs.c7
-rw-r--r--src/backend/optimizer/plan/createplan.c54
-rw-r--r--src/backend/optimizer/util/tlist.c25
-rw-r--r--src/backend/partitioning/partbounds.c4
-rw-r--r--src/backend/partitioning/partprune.c3
-rw-r--r--src/backend/regex/regc_pg_locale.c5
-rw-r--r--src/backend/utils/adt/arrayfuncs.c2
-rw-r--r--src/backend/utils/adt/like.c27
-rw-r--r--src/backend/utils/adt/like_support.c14
-rw-r--r--src/backend/utils/adt/name.c32
-rw-r--r--src/backend/utils/adt/orderedsetaggs.c3
-rw-r--r--src/backend/utils/adt/pg_locale.c1
-rw-r--r--src/backend/utils/adt/ri_triggers.c33
-rw-r--r--src/backend/utils/adt/varchar.c194
-rw-r--r--src/backend/utils/adt/varlena.c333
-rw-r--r--src/backend/utils/cache/catcache.c9
-rw-r--r--src/backend/utils/cache/lsyscache.c16
37 files changed, 772 insertions, 213 deletions
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index e5f3d42e04..0bf15ae723 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -27,8 +27,10 @@
#include "postgres.h"
#include "access/hash.h"
+#include "catalog/pg_collation.h"
#include "utils/builtins.h"
#include "utils/hashutils.h"
+#include "utils/pg_locale.h"
/*
* Datatype-specific hash functions.
@@ -243,15 +245,51 @@ Datum
hashtext(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ pg_locale_t mylocale = 0;
Datum result;
- /*
- * Note: this is currently identical in behavior to hashvarlena, but keep
- * it as a separate function in case we someday want to do something
- * different in non-C locales. (See also hashbpchar, if so.)
- */
- result = hash_any((unsigned char *) VARDATA_ANY(key),
- VARSIZE_ANY_EXHDR(key));
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any(buf, bsize);
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
@@ -263,12 +301,52 @@ Datum
hashtextextended(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ pg_locale_t mylocale = 0;
Datum result;
- /* Same approach as hashtext */
- result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
- VARSIZE_ANY_EXHDR(key),
- PG_GETARG_INT64(1));
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key),
+ PG_GETARG_INT64(1));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
PG_FREE_IF_COPY(key, 0);
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index 39cd391529..d22998c54b 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -630,7 +630,8 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
* query (prefix) string, so we don't need to check it again.
*/
res = (level >= queryLen) ||
- DatumGetBool(DirectFunctionCall2(text_starts_with,
+ DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
+ PG_GET_COLLATION(),
out->leafValue,
PointerGetDatum(query)));
diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c
index 74e1e82cb9..dd99d53547 100644
--- a/src/backend/catalog/pg_collation.c
+++ b/src/backend/catalog/pg_collation.c
@@ -46,6 +46,7 @@ Oid
CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
char collprovider,
+ bool collisdeterministic,
int32 collencoding,
const char *collcollate, const char *collctype,
const char *collversion,
@@ -160,6 +161,7 @@ CollationCreate(const char *collname, Oid collnamespace,
values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace);
values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner);
values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider);
+ values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic);
values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding);
namestrcpy(&name_collate, collcollate);
values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate);
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index ed3f1c12e5..919e092483 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -59,10 +59,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
DefElem *lccollateEl = NULL;
DefElem *lcctypeEl = NULL;
DefElem *providerEl = NULL;
+ DefElem *deterministicEl = NULL;
DefElem *versionEl = NULL;
char *collcollate = NULL;
char *collctype = NULL;
char *collproviderstr = NULL;
+ bool collisdeterministic = true;
int collencoding = 0;
char collprovider = 0;
char *collversion = NULL;
@@ -91,6 +93,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
defelp = &lcctypeEl;
else if (strcmp(defel->defname, "provider") == 0)
defelp = &providerEl;
+ else if (strcmp(defel->defname, "deterministic") == 0)
+ defelp = &deterministicEl;
else if (strcmp(defel->defname, "version") == 0)
defelp = &versionEl;
else
@@ -125,6 +129,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+ collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
ReleaseSysCache(tp);
@@ -157,6 +162,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
if (providerEl)
collproviderstr = defGetString(providerEl);
+ if (deterministicEl)
+ collisdeterministic = defGetBoolean(deterministicEl);
+
if (versionEl)
collversion = defGetString(versionEl);
@@ -185,6 +193,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"lc_ctype\" must be specified")));
+ /*
+ * Nondeterministic collations are currently only supported with ICU
+ * because that's the only case where it can actually make a difference.
+ * So we can save writing the code for the other providers.
+ */
+ if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations not supported with this provider")));
+
if (!fromEl)
{
if (collprovider == COLLPROVIDER_ICU)
@@ -203,6 +221,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
collNamespace,
GetUserId(),
collprovider,
+ collisdeterministic,
collencoding,
collcollate,
collctype,
@@ -586,7 +605,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
* about existing ones.
*/
collid = CollationCreate(localebuf, nspid, GetUserId(),
- COLLPROVIDER_LIBC, enc,
+ COLLPROVIDER_LIBC, true, enc,
localebuf, localebuf,
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
true, true);
@@ -647,7 +666,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
int enc = aliases[i].enc;
collid = CollationCreate(alias, nspid, GetUserId(),
- COLLPROVIDER_LIBC, enc,
+ COLLPROVIDER_LIBC, true, enc,
locale, locale,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true, true);
@@ -709,7 +728,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(),
- COLLPROVIDER_ICU, -1,
+ COLLPROVIDER_ICU, true, -1,
collcollate, collcollate,
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
true, true);
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
index daf3f51636..d4723fced8 100644
--- a/src/backend/commands/extension.c
+++ b/src/backend/commands/extension.c
@@ -901,7 +901,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
{
const char *qSchemaName = quote_identifier(schemaName);
- t_sql = DirectFunctionCall3(replace_text,
+ t_sql = DirectFunctionCall3Coll(replace_text,
+ C_COLLATION_OID,
t_sql,
CStringGetTextDatum("@extschema@"),
CStringGetTextDatum(qSchemaName));
@@ -913,7 +914,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
*/
if (control->module_pathname)
{
- t_sql = DirectFunctionCall3(replace_text,
+ t_sql = DirectFunctionCall3Coll(replace_text,
+ C_COLLATION_OID,
t_sql,
CStringGetTextDatum("MODULE_PATHNAME"),
CStringGetTextDatum(control->module_pathname));
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 7cbf9d3bc1..0fb31f5c3d 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3317,6 +3317,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
int numCols,
const AttrNumber *keyColIdx,
const Oid *eqfunctions,
+ const Oid *collations,
PlanState *parent)
{
ExprState *state = makeNode(ExprState);
@@ -3377,6 +3378,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1);
Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1);
Oid foid = eqfunctions[natt];
+ Oid collid = collations[natt];
FmgrInfo *finfo;
FunctionCallInfo fcinfo;
AclResult aclresult;
@@ -3394,7 +3396,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
fmgr_info(foid, finfo);
fmgr_info_set_expr(NULL, finfo);
InitFunctionCallInfoData(*fcinfo, finfo, 2,
- InvalidOid, NULL, NULL);
+ collid, NULL, NULL);
/* left arg */
scratch.opcode = EEOP_INNER_VAR;
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 417e971ec8..14ee8db3f9 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -60,6 +60,7 @@ execTuplesMatchPrepare(TupleDesc desc,
int numCols,
const AttrNumber *keyColIdx,
const Oid *eqOperators,
+ const Oid *collations,
PlanState *parent)
{
Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
@@ -75,7 +76,7 @@ execTuplesMatchPrepare(TupleDesc desc,
/* build actual expression */
expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
- numCols, keyColIdx, eqFunctions,
+ numCols, keyColIdx, eqFunctions, collations,
parent);
return expr;
@@ -155,6 +156,7 @@ BuildTupleHashTableExt(PlanState *parent,
int numCols, AttrNumber *keyColIdx,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
+ Oid *collations,
long nbuckets, Size additionalsize,
MemoryContext metacxt,
MemoryContext tablecxt,
@@ -177,6 +179,7 @@ BuildTupleHashTableExt(PlanState *parent,
hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx;
hashtable->tab_hash_funcs = hashfunctions;
+ hashtable->tab_collations = collations;
hashtable->tablecxt = tablecxt;
hashtable->tempcxt = tempcxt;
hashtable->entrysize = entrysize;
@@ -212,7 +215,7 @@ BuildTupleHashTableExt(PlanState *parent,
hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
&TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
numCols,
- keyColIdx, eqfuncoids,
+ keyColIdx, eqfuncoids, collations,
NULL);
/*
@@ -240,6 +243,7 @@ BuildTupleHashTable(PlanState *parent,
int numCols, AttrNumber *keyColIdx,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
+ Oid *collations,
long nbuckets, Size additionalsize,
MemoryContext tablecxt,
MemoryContext tempcxt,
@@ -250,6 +254,7 @@ BuildTupleHashTable(PlanState *parent,
numCols, keyColIdx,
eqfuncoids,
hashfunctions,
+ collations,
nbuckets, additionalsize,
tablecxt,
tablecxt,
@@ -421,8 +426,9 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
{
uint32 hkey;
- hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
- attr));
+ hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
+ hashtable->tab_collations[i],
+ attr));
hashkey ^= hkey;
}
}
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 37e96a6013..cfad8a38f0 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1246,6 +1246,7 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
rowHash = compute_partition_hash_value(key->partnatts,
key->partsupfunc,
+ key->partcollation,
values, isnull);
part_index = boundinfo->indexes[rowHash % greatest_modulus];
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 95dfc4987d..c539bb5a3f 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -96,6 +96,8 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
regop,
searchslot->tts_values[mainattno - 1]);
+ skey[attoff].sk_collation = idxrel->rd_indcollation[attoff];
+
/* Check for null value. */
if (searchslot->tts_isnull[mainattno - 1])
{
@@ -262,7 +264,8 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2)
errmsg("could not identify an equality operator for type %s",
format_type_be(att->atttypid))));
- if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
+ if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo,
+ att->attcollation,
slot1->tts_values[attrnum],
slot2->tts_values[attrnum])))
return false;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index bae7989a42..47161afbd4 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -746,15 +746,14 @@ process_ordered_aggregate_single(AggState *aggstate,
/*
* If DISTINCT mode, and not distinct from prior, skip it.
- *
- * Note: we assume equality functions don't care about collation.
*/
if (isDistinct &&
haveOldVal &&
((oldIsNull && *isNull) ||
(!oldIsNull && !*isNull &&
oldAbbrevVal == newAbbrevVal &&
- DatumGetBool(FunctionCall2(&pertrans->equalfnOne,
+ DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
+ pertrans->aggCollation,
oldVal, *newVal)))))
{
/* equal to prior, so forget this one */
@@ -1287,6 +1286,7 @@ build_hash_table(AggState *aggstate)
perhash->hashGrpColIdxHash,
perhash->eqfuncoids,
perhash->hashfunctions,
+ perhash->aggnode->grpCollations,
perhash->aggnode->numGroups,
additionalsize,
aggstate->ss.ps.state->es_query_cxt,
@@ -2381,6 +2381,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
length,
aggnode->grpColIdx,
aggnode->grpOperators,
+ aggnode->grpCollations,
(PlanState *) aggstate);
}
@@ -2392,6 +2393,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggnode->numCols,
aggnode->grpColIdx,
aggnode->grpOperators,
+ aggnode->grpCollations,
(PlanState *) aggstate);
}
}
@@ -3155,6 +3157,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
numDistinctCols,
pertrans->sortColIdx,
ops,
+ pertrans->sortCollations,
&aggstate->ss.ps);
pfree(ops);
}
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c
index 655084d7b5..05f1d33150 100644
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@@ -212,6 +212,7 @@ ExecInitGroup(Group *node, EState *estate, int eflags)
node->numCols,
node->grpColIdx,
node->grpOperators,
+ node->grpCollations,
&grpstate->ss.ps);
return grpstate;
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 856daf6a7f..64eec91f8b 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -425,7 +425,7 @@ ExecEndHash(HashState *node)
* ----------------------------------------------------------------
*/
HashJoinTable
-ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
+ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls)
{
Hash *node;
HashJoinTable hashtable;
@@ -439,6 +439,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
int nkeys;
int i;
ListCell *ho;
+ ListCell *hc;
MemoryContext oldcxt;
/*
@@ -541,8 +542,9 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
hashtable->inner_hashfunctions =
(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
+ hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid));
i = 0;
- foreach(ho, hashOperators)
+ forboth(ho, hashOperators, hc, hashCollations)
{
Oid hashop = lfirst_oid(ho);
Oid left_hashfn;
@@ -554,6 +556,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
hashtable->hashStrict[i] = op_strict(hashop);
+ hashtable->collations[i] = lfirst_oid(hc);
i++;
}
@@ -1847,7 +1850,7 @@ ExecHashGetHashValue(HashJoinTable hashtable,
/* Compute the hash function */
uint32 hkey;
- hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval));
+ hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval));
hashkey ^= hkey;
}
@@ -2303,8 +2306,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
uint32 hashvalue;
int bucket;
- hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0],
- sslot.values[i]));
+ hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0],
+ hashtable->collations[0],
+ sslot.values[i]));
/*
* While we have not hit a hole in the hashtable and have not hit
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 2098708864..aa43296e26 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -278,6 +278,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
*/
hashtable = ExecHashTableCreate(hashNode,
node->hj_HashOperators,
+ node->hj_Collations,
HJ_FILL_INNER(node));
node->hj_HashTable = hashtable;
@@ -603,6 +604,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
List *rclauses;
List *rhclauses;
List *hoperators;
+ List *hcollations;
TupleDesc outerDesc,
innerDesc;
ListCell *l;
@@ -738,6 +740,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
rclauses = NIL;
rhclauses = NIL;
hoperators = NIL;
+ hcollations = NIL;
foreach(l, node->hashclauses)
{
OpExpr *hclause = lfirst_node(OpExpr, l);
@@ -749,10 +752,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args),
innerPlanState(hjstate)));
hoperators = lappend_oid(hoperators, hclause->opno);
+ hcollations = lappend_oid(hcollations, hclause->inputcollid);
}
hjstate->hj_OuterHashKeys = lclauses;
hjstate->hj_InnerHashKeys = rclauses;
hjstate->hj_HashOperators = hoperators;
+ hjstate->hj_Collations = hcollations;
/* child Hash node needs to evaluate inner hash keys, too */
((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses;
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 9b74ed3208..9c5eed7def 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -43,6 +43,7 @@ build_hash_table(RecursiveUnionState *rustate)
node->dupColIdx,
rustate->eqfuncoids,
rustate->hashfunctions,
+ node->dupCollations,
node->numGroups,
0,
rustate->ps.state->es_query_cxt,
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 26aeaee083..044246aa09 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -132,6 +132,7 @@ build_hash_table(SetOpState *setopstate)
node->dupColIdx,
setopstate->eqfuncoids,
setopstate->hashfunctions,
+ node->dupCollations,
node->numGroups,
0,
setopstate->ps.state->es_query_cxt,
@@ -554,6 +555,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
node->numCols,
node->dupColIdx,
node->dupOperators,
+ node->dupCollations,
&setopstate->ps);
if (node->strategy == SETOP_HASHED)
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index d7d076758c..749b4eced3 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -514,6 +514,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
node->keyColIdx,
node->tab_eq_funcoids,
node->tab_hash_funcs,
+ node->tab_collations,
nbuckets,
0,
node->planstate->state->es_query_cxt,
@@ -541,6 +542,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
node->keyColIdx,
node->tab_eq_funcoids,
node->tab_hash_funcs,
+ node->tab_collations,
nbuckets,
0,
node->planstate->state->es_query_cxt,
@@ -642,6 +644,7 @@ execTuplesUnequal(TupleTableSlot *slot1,
int numCols,
AttrNumber *matchColIdx,
FmgrInfo *eqfunctions,
+ const Oid *collations,
MemoryContext evalContext)
{
MemoryContext oldContext;
@@ -679,8 +682,8 @@ execTuplesUnequal(TupleTableSlot *slot1,
continue; /* can't prove anything here */
/* Apply the type-specific equality function */
-
- if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+ if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i],
+ collations[i],
attr1, attr2)))
{
result = true; /* they are unequal */
@@ -722,6 +725,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
if (!execTuplesUnequal(slot, hashtable->tableslot,
numCols, keyColIdx,
eqfunctions,
+ hashtable->tab_collations,
hashtable->tempcxt))
{
TermTupleHashIterator(&hashiter);
@@ -817,6 +821,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
sstate->tab_eq_funcoids = NULL;
sstate->tab_hash_funcs = NULL;
sstate->tab_eq_funcs = NULL;
+ sstate->tab_collations = NULL;
sstate->lhs_hash_funcs = NULL;
sstate->cur_eq_funcs = NULL;
@@ -915,6 +920,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid));
sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
i = 1;
@@ -965,6 +971,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]);
fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]);
+ /* Set collation */
+ sstate->tab_collations[i - 1] = opexpr->inputcollid;
+
i++;
}
@@ -1001,6 +1010,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
ncols,
sstate->keyColIdx,
sstate->tab_eq_funcoids,
+ sstate->tab_collations,
parent);
}
diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c
index ad7039937d..c553f150b8 100644
--- a/src/backend/executor/nodeUnique.c
+++ b/src/backend/executor/nodeUnique.c
@@ -152,6 +152,7 @@ ExecInitUnique(Unique *node, EState *estate, int eflags)
node->numCols,
node->uniqColIdx,
node->uniqOperators,
+ node->uniqCollations,
&uniquestate->ps);
return uniquestate;
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 157ac042b8..b090828c01 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -2370,6 +2370,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
node->partNumCols,
node->partColIdx,
node->partOperators,
+ node->partCollations,
&winstate->ss.ps);
if (node->ordNumCols > 0)
@@ -2378,6 +2379,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
node->ordNumCols,
node->ordColIdx,
node->ordOperators,
+ node->ordCollations,
&winstate->ss.ps);
/*
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index c68bd7bcf7..1ea6b84561 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -297,6 +297,7 @@ _copyRecursiveUnion(const RecursiveUnion *from)
{
COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(numGroups);
@@ -956,6 +957,7 @@ _copyGroup(const Group *from)
COPY_SCALAR_FIELD(numCols);
COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid));
return newnode;
}
@@ -977,6 +979,7 @@ _copyAgg(const Agg *from)
{
COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(numGroups);
COPY_BITMAPSET_FIELD(aggParams);
@@ -1002,12 +1005,14 @@ _copyWindowAgg(const WindowAgg *from)
{
COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid));
+ COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(ordNumCols);
if (from->ordNumCols > 0)
{
COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid));
+ COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(frameOptions);
COPY_NODE_FIELD(startOffset);
@@ -1040,6 +1045,7 @@ _copyUnique(const Unique *from)
COPY_SCALAR_FIELD(numCols);
COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid));
return newnode;
}
@@ -1089,6 +1095,7 @@ _copySetOp(const SetOp *from)
COPY_SCALAR_FIELD(numCols);
COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid));
COPY_SCALAR_FIELD(flagColIdx);
COPY_SCALAR_FIELD(firstFlag);
COPY_SCALAR_FIELD(numGroups);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 69179a07c3..910a738c20 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -463,6 +463,7 @@ _outRecursiveUnion(StringInfo str, const RecursiveUnion *node)
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
WRITE_OID_ARRAY(dupOperators, node->numCols);
+ WRITE_OID_ARRAY(dupCollations, node->numCols);
WRITE_LONG_FIELD(numGroups);
}
@@ -774,6 +775,7 @@ _outAgg(StringInfo str, const Agg *node)
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
WRITE_OID_ARRAY(grpOperators, node->numCols);
+ WRITE_OID_ARRAY(grpCollations, node->numCols);
WRITE_LONG_FIELD(numGroups);
WRITE_BITMAPSET_FIELD(aggParams);
WRITE_NODE_FIELD(groupingSets);
@@ -791,9 +793,11 @@ _outWindowAgg(StringInfo str, const WindowAgg *node)
WRITE_INT_FIELD(partNumCols);
WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols);
WRITE_OID_ARRAY(partOperators, node->partNumCols);
+ WRITE_OID_ARRAY(partCollations, node->partNumCols);
WRITE_INT_FIELD(ordNumCols);
WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols);
WRITE_OID_ARRAY(ordOperators, node->ordNumCols);
+ WRITE_OID_ARRAY(ordCollations, node->ordNumCols);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
@@ -814,6 +818,7 @@ _outGroup(StringInfo str, const Group *node)
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
WRITE_OID_ARRAY(grpOperators, node->numCols);
+ WRITE_OID_ARRAY(grpCollations, node->numCols);
}
static void
@@ -848,6 +853,7 @@ _outUnique(StringInfo str, const Unique *node)
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols);
WRITE_OID_ARRAY(uniqOperators, node->numCols);
+ WRITE_OID_ARRAY(uniqCollations, node->numCols);
}
static void
@@ -875,6 +881,7 @@ _outSetOp(StringInfo str, const SetOp *node)
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
WRITE_OID_ARRAY(dupOperators, node->numCols);
+ WRITE_OID_ARRAY(dupCollations, node->numCols);
WRITE_INT_FIELD(flagColIdx);
WRITE_INT_FIELD(firstFlag);
WRITE_LONG_FIELD(numGroups);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 4b845b1bb7..eff98febf1 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1677,6 +1677,7 @@ _readRecursiveUnion(void)
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
READ_OID_ARRAY(dupOperators, local_node->numCols);
+ READ_OID_ARRAY(dupCollations, local_node->numCols);
READ_LONG_FIELD(numGroups);
READ_DONE();
@@ -2143,6 +2144,7 @@ _readGroup(void)
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
READ_OID_ARRAY(grpOperators, local_node->numCols);
+ READ_OID_ARRAY(grpCollations, local_node->numCols);
READ_DONE();
}
@@ -2162,6 +2164,7 @@ _readAgg(void)
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
READ_OID_ARRAY(grpOperators, local_node->numCols);
+ READ_OID_ARRAY(grpCollations, local_node->numCols);
READ_LONG_FIELD(numGroups);
READ_BITMAPSET_FIELD(aggParams);
READ_NODE_FIELD(groupingSets);
@@ -2184,9 +2187,11 @@ _readWindowAgg(void)
READ_INT_FIELD(partNumCols);
READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols);
READ_OID_ARRAY(partOperators, local_node->partNumCols);
+ READ_OID_ARRAY(partCollations, local_node->partNumCols);
READ_INT_FIELD(ordNumCols);
READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols);
READ_OID_ARRAY(ordOperators, local_node->ordNumCols);
+ READ_OID_ARRAY(ordCollations, local_node->ordNumCols);
READ_INT_FIELD(frameOptions);
READ_NODE_FIELD(startOffset);
READ_NODE_FIELD(endOffset);
@@ -2212,6 +2217,7 @@ _readUnique(void)
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols);
READ_OID_ARRAY(uniqOperators, local_node->numCols);
+ READ_OID_ARRAY(uniqCollations, local_node->numCols);
READ_DONE();
}
@@ -2290,6 +2296,7 @@ _readSetOp(void)
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
READ_OID_ARRAY(dupOperators, local_node->numCols);
+ READ_OID_ARRAY(dupCollations, local_node->numCols);
READ_INT_FIELD(flagColIdx);
READ_INT_FIELD(firstFlag);
READ_LONG_FIELD(numGroups);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 9fbe5b2a5f..93c56c657c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -260,14 +260,14 @@ static Sort *make_sort_from_groupcols(List *groupcls,
Plan *lefttree);
static Material *make_material(Plan *lefttree);
static WindowAgg *make_windowagg(List *tlist, Index winref,
- int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
- int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+ int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
+ int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
int frameOptions, Node *startOffset, Node *endOffset,
Oid startInRangeFunc, Oid endInRangeFunc,
Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
Plan *lefttree);
static Group *make_group(List *tlist, List *qual, int numGroupCols,
- AttrNumber *grpColIdx, Oid *grpOperators,
+ AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
Plan *lefttree);
static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList);
static Unique *make_unique_from_pathkeys(Plan *lefttree,
@@ -1387,6 +1387,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
bool newitems;
int numGroupCols;
AttrNumber *groupColIdx;
+ Oid *groupCollations;
int groupColPos;
ListCell *l;
@@ -1453,6 +1454,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
newtlist = subplan->targetlist;
numGroupCols = list_length(uniq_exprs);
groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
+ groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid));
groupColPos = 0;
foreach(l, uniq_exprs)
@@ -1463,7 +1465,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
tle = tlist_member(uniqexpr, newtlist);
if (!tle) /* shouldn't happen */
elog(ERROR, "failed to find unique expression in subplan tlist");
- groupColIdx[groupColPos++] = tle->resno;
+ groupColIdx[groupColPos] = tle->resno;
+ groupCollations[groupColPos] = exprCollation((Node *) tle->expr);
+ groupColPos++;
}
if (best_path->umethod == UNIQUE_PATH_HASH)
@@ -1501,6 +1505,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
numGroupCols,
groupColIdx,
groupOperators,
+ groupCollations,
NIL,
NIL,
best_path->path.rows,
@@ -1883,6 +1888,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path)
extract_grouping_cols(best_path->groupClause,
subplan->targetlist),
extract_grouping_ops(best_path->groupClause),
+ extract_grouping_collations(best_path->groupClause,
+ subplan->targetlist),
subplan);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -1949,6 +1956,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
extract_grouping_cols(best_path->groupClause,
subplan->targetlist),
extract_grouping_ops(best_path->groupClause),
+ extract_grouping_collations(best_path->groupClause,
+ subplan->targetlist),
NIL,
NIL,
best_path->numGroups,
@@ -2110,6 +2119,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
list_length((List *) linitial(rollup->gsets)),
new_grpColIdx,
extract_grouping_ops(rollup->groupClause),
+ extract_grouping_collations(rollup->groupClause, subplan->targetlist),
rollup->gsets,
NIL,
rollup->numGroups,
@@ -2147,6 +2157,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
numGroupCols,
top_grpColIdx,
extract_grouping_ops(rollup->groupClause),
+ extract_grouping_collations(rollup->groupClause, subplan->targetlist),
rollup->gsets,
chain,
rollup->numGroups,
@@ -2246,9 +2257,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
int partNumCols;
AttrNumber *partColIdx;
Oid *partOperators;
+ Oid *partCollations;
int ordNumCols;
AttrNumber *ordColIdx;
Oid *ordOperators;
+ Oid *ordCollations;
ListCell *lc;
/*
@@ -2270,6 +2283,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
*/
partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart);
partOperators = (Oid *) palloc(sizeof(Oid) * numPart);
+ partCollations = (Oid *) palloc(sizeof(Oid) * numPart);
partNumCols = 0;
foreach(lc, wc->partitionClause)
@@ -2280,11 +2294,13 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
Assert(OidIsValid(sgc->eqop));
partColIdx[partNumCols] = tle->resno;
partOperators[partNumCols] = sgc->eqop;
+ partCollations[partNumCols] = exprCollation((Node *) tle->expr);
partNumCols++;
}
ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder);
ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder);
+ ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder);
ordNumCols = 0;
foreach(lc, wc->orderClause)
@@ -2295,6 +2311,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
Assert(OidIsValid(sgc->eqop));
ordColIdx[ordNumCols] = tle->resno;
ordOperators[ordNumCols] = sgc->eqop;
+ ordCollations[ordNumCols] = exprCollation((Node *) tle->expr);
ordNumCols++;
}
@@ -2304,9 +2321,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
partNumCols,
partColIdx,
partOperators,
+ partCollations,
ordNumCols,
ordColIdx,
ordOperators,
+ ordCollations,
wc->frameOptions,
wc->startOffset,
wc->endOffset,
@@ -5326,10 +5345,12 @@ make_recursive_union(List *tlist,
int keyno = 0;
AttrNumber *dupColIdx;
Oid *dupOperators;
+ Oid *dupCollations;
ListCell *slitem;
dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(slitem, distinctList)
{
@@ -5339,11 +5360,13 @@ make_recursive_union(List *tlist,
dupColIdx[keyno] = tle->resno;
dupOperators[keyno] = sortcl->eqop;
+ dupCollations[keyno] = exprCollation((Node *) tle->expr);
Assert(OidIsValid(dupOperators[keyno]));
keyno++;
}
node->dupColIdx = dupColIdx;
node->dupOperators = dupOperators;
+ node->dupCollations = dupCollations;
}
node->numGroups = numGroups;
@@ -6015,7 +6038,7 @@ materialize_finished_plan(Plan *subplan)
Agg *
make_agg(List *tlist, List *qual,
AggStrategy aggstrategy, AggSplit aggsplit,
- int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
+ int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
List *groupingSets, List *chain,
double dNumGroups, Plan *lefttree)
{
@@ -6031,6 +6054,7 @@ make_agg(List *tlist, List *qual,
node->numCols = numGroupCols;
node->grpColIdx = grpColIdx;
node->grpOperators = grpOperators;
+ node->grpCollations = grpCollations;
node->numGroups = numGroups;
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
node->groupingSets = groupingSets;
@@ -6046,8 +6070,8 @@ make_agg(List *tlist, List *qual,
static WindowAgg *
make_windowagg(List *tlist, Index winref,
- int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
- int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+ int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
+ int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
int frameOptions, Node *startOffset, Node *endOffset,
Oid startInRangeFunc, Oid endInRangeFunc,
Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
@@ -6060,9 +6084,11 @@ make_windowagg(List *tlist, Index winref,
node->partNumCols = partNumCols;
node->partColIdx = partColIdx;
node->partOperators = partOperators;
+ node->partCollations = partCollations;
node->ordNumCols = ordNumCols;
node->ordColIdx = ordColIdx;
node->ordOperators = ordOperators;
+ node->ordCollations = ordCollations;
node->frameOptions = frameOptions;
node->startOffset = startOffset;
node->endOffset = endOffset;
@@ -6087,6 +6113,7 @@ make_group(List *tlist,
int numGroupCols,
AttrNumber *grpColIdx,
Oid *grpOperators,
+ Oid *grpCollations,
Plan *lefttree)
{
Group *node = makeNode(Group);
@@ -6095,6 +6122,7 @@ make_group(List *tlist,
node->numCols = numGroupCols;
node->grpColIdx = grpColIdx;
node->grpOperators = grpOperators;
+ node->grpCollations = grpCollations;
plan->qual = qual;
plan->targetlist = tlist;
@@ -6118,6 +6146,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
int keyno = 0;
AttrNumber *uniqColIdx;
Oid *uniqOperators;
+ Oid *uniqCollations;
ListCell *slitem;
plan->targetlist = lefttree->targetlist;
@@ -6132,6 +6161,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
Assert(numCols > 0);
uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(slitem, distinctList)
{
@@ -6140,6 +6170,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
uniqColIdx[keyno] = tle->resno;
uniqOperators[keyno] = sortcl->eqop;
+ uniqCollations[keyno] = exprCollation((Node *) tle->expr);
Assert(OidIsValid(uniqOperators[keyno]));
keyno++;
}
@@ -6147,6 +6178,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
node->numCols = numCols;
node->uniqColIdx = uniqColIdx;
node->uniqOperators = uniqOperators;
+ node->uniqCollations = uniqCollations;
return node;
}
@@ -6162,6 +6194,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
int keyno = 0;
AttrNumber *uniqColIdx;
Oid *uniqOperators;
+ Oid *uniqCollations;
ListCell *lc;
plan->targetlist = lefttree->targetlist;
@@ -6177,6 +6210,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
Assert(numCols >= 0 && numCols <= list_length(pathkeys));
uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(lc, pathkeys)
{
@@ -6245,6 +6279,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
uniqColIdx[keyno] = tle->resno;
uniqOperators[keyno] = eqop;
+ uniqCollations[keyno] = ec->ec_collation;
keyno++;
}
@@ -6252,6 +6287,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
node->numCols = numCols;
node->uniqColIdx = uniqColIdx;
node->uniqOperators = uniqOperators;
+ node->uniqCollations = uniqCollations;
return node;
}
@@ -6296,6 +6332,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
int keyno = 0;
AttrNumber *dupColIdx;
Oid *dupOperators;
+ Oid *dupCollations;
ListCell *slitem;
plan->targetlist = lefttree->targetlist;
@@ -6309,6 +6346,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
*/
dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(slitem, distinctList)
{
@@ -6317,6 +6355,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
dupColIdx[keyno] = tle->resno;
dupOperators[keyno] = sortcl->eqop;
+ dupCollations[keyno] = exprCollation((Node *) tle->expr);
Assert(OidIsValid(dupOperators[keyno]));
keyno++;
}
@@ -6326,6 +6365,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
node->numCols = numCols;
node->dupColIdx = dupColIdx;
node->dupOperators = dupOperators;
+ node->dupCollations = dupCollations;
node->flagColIdx = flagColIdx;
node->firstFlag = firstFlag;
node->numGroups = numGroups;
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 14d1c67a94..bb3b7969f2 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -504,6 +504,31 @@ extract_grouping_ops(List *groupClause)
}
/*
+ * extract_grouping_collations - make an array of the grouping column collations
+ * for a SortGroupClause list
+ */
+Oid *
+extract_grouping_collations(List *groupClause, List *tlist)
+{
+ int numCols = list_length(groupClause);
+ int colno = 0;
+ Oid *grpCollations;
+ ListCell *glitem;
+
+ grpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+
+ foreach(glitem, groupClause)
+ {
+ SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+ TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+ grpCollations[colno++] = exprCollation((Node *) tle->expr);
+ }
+
+ return grpCollations;
+}
+
+/*
* extract_grouping_cols - make an array of the grouping column resnos
* for a SortGroupClause list
*/
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index 5b897d50ee..803c23aaf5 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -2657,7 +2657,7 @@ get_range_nulltest(PartitionKey key)
* Compute the hash value for given partition key values.
*/
uint64
-compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
+compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
Datum *values, bool *isnull)
{
int i;
@@ -2678,7 +2678,7 @@ compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
* datatype-specific hash functions of each partition key
* attribute.
*/
- hash = FunctionCall2(&partsupfunc[i], values[i], seed);
+ hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c
index b5c0889935..31e0164ea9 100644
--- a/src/backend/partitioning/partprune.c
+++ b/src/backend/partitioning/partprune.c
@@ -2159,6 +2159,7 @@ get_matching_hash_bounds(PartitionPruneContext *context,
int i;
uint64 rowHash;
int greatest_modulus;
+ Oid *partcollation = context->partcollation;
Assert(context->strategy == PARTITION_STRATEGY_HASH);
@@ -2179,7 +2180,7 @@ get_matching_hash_bounds(PartitionPruneContext *context,
isnull[i] = bms_is_member(i, nullkeys);
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
- rowHash = compute_partition_hash_value(partnatts, partsupfunc,
+ rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation,
values, isnull);
if (partindices[rowHash % greatest_modulus] >= 0)
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index a8c0b156fa..4a808b7606 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -263,6 +263,11 @@ pg_set_regex_collation(Oid collation)
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
+ if (pg_regex_locale && !pg_regex_locale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for regular expressions")));
+
#ifdef USE_ICU
if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
pg_regex_strategy = PG_REGEX_LOCALE_ICU;
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a34605ac94..9cef018c0b 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3957,7 +3957,7 @@ hash_array(PG_FUNCTION_ARGS)
* apply the hash function to each array element.
*/
InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1,
- InvalidOid, NULL, NULL);
+ PG_GET_COLLATION(), NULL, NULL);
/* Loop over source data */
nitems = ArrayGetNItems(ndims, dims);
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 853c9c01e9..704e5720cf 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -45,7 +45,7 @@ static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
pg_locale_t locale, bool locale_is_c);
-static int GenericMatchText(const char *s, int slen, const char *p, int plen);
+static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
/*--------------------
@@ -148,8 +148,18 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
/* Generic for all cases not requiring inline case-folding */
static inline int
-GenericMatchText(const char *s, int slen, const char *p, int plen)
+GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
{
+ if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID)
+ {
+ pg_locale_t locale = pg_newlocale_from_collation(collation);
+
+ if (locale && !locale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for LIKE")));
+ }
+
if (pg_database_encoding_max_length() == 1)
return SB_MatchText(s, slen, p, plen, 0, true);
else if (GetDatabaseEncoding() == PG_UTF8)
@@ -184,6 +194,11 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
locale = pg_newlocale_from_collation(collation);
+
+ if (locale && !locale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for ILIKE")));
}
/*
@@ -240,7 +255,7 @@ namelike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
- result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
PG_RETURN_BOOL(result);
}
@@ -261,7 +276,7 @@ namenlike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
- result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
PG_RETURN_BOOL(result);
}
@@ -282,7 +297,7 @@ textlike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
- result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
PG_RETURN_BOOL(result);
}
@@ -303,7 +318,7 @@ textnlike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
- result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
PG_RETURN_BOOL(result);
}
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 69509811ef..a65e63736c 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -258,6 +258,20 @@ match_pattern_prefix(Node *leftop,
patt = (Const *) rightop;
/*
+ * Not supported if the expression collation is nondeterministic. The
+ * optimized equality or prefix tests use bytewise comparisons, which is
+ * not consistent with nondeterministic collations. The actual
+ * pattern-matching implementation functions will later error out that
+ * pattern-matching is not supported with nondeterministic collations.
+ * (We could also error out here, but by doing it later we get more
+ * precise error messages.) (It should be possible to support at least
+ * Pattern_Prefix_Exact, but no point as along as the actual
+ * pattern-matching implementations don't support it.)
+ */
+ if (!get_collation_isdeterministic(expr_coll))
+ return NIL;
+
+ /*
* Try to extract a fixed prefix from the pattern.
*/
pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c
index 3a7887d455..54425925ed 100644
--- a/src/backend/utils/adt/name.c
+++ b/src/backend/utils/adt/name.c
@@ -131,14 +131,26 @@ namesend(PG_FUNCTION_ARGS)
* have a '\0' terminator. Whatever might be past the terminator is not
* considered relevant to comparisons.
*/
+static int
+namecmp(Name arg1, Name arg2, Oid collid)
+{
+ /* Fast path for common case used in system catalogs */
+ if (collid == C_COLLATION_OID)
+ return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
+
+ /* Else rely on the varstr infrastructure */
+ return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
+ NameStr(*arg2), strlen(NameStr(*arg2)),
+ collid);
+}
+
Datum
nameeq(PG_FUNCTION_ARGS)
{
Name arg1 = PG_GETARG_NAME(0);
Name arg2 = PG_GETARG_NAME(1);
- /* Collation doesn't matter: equal only if bitwise-equal */
- PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0);
+ PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) == 0);
}
Datum
@@ -147,21 +159,7 @@ namene(PG_FUNCTION_ARGS)
Name arg1 = PG_GETARG_NAME(0);
Name arg2 = PG_GETARG_NAME(1);
- /* Collation doesn't matter: equal only if bitwise-equal */
- PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0);
-}
-
-static int
-namecmp(Name arg1, Name arg2, Oid collid)
-{
- /* Fast path for common case used in system catalogs */
- if (collid == C_COLLATION_OID)
- return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
-
- /* Else rely on the varstr infrastructure */
- return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
- NameStr(*arg2), strlen(NameStr(*arg2)),
- collid);
+ PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) != 0);
}
Datum
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 2d384a9944..4db2d0d0e1 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -1084,7 +1084,7 @@ mode_final(PG_FUNCTION_ARGS)
last_abbrev_val = abbrev_val;
}
else if (abbrev_val == last_abbrev_val &&
- DatumGetBool(FunctionCall2(equalfn, val, last_val)))
+ DatumGetBool(FunctionCall2Coll(equalfn, PG_GET_COLLATION(), val, last_val)))
{
/* value equal to previous value, count it */
if (last_val_is_mode)
@@ -1345,6 +1345,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
numDistinctCols,
sortColIdx,
osastate->qstate->eqOperators,
+ osastate->qstate->sortCollations,
NULL);
MemoryContextSwitchTo(oldContext);
osastate->qstate->compareTuple = compareTuple;
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 7fe10e284a..6e33d65340 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1312,6 +1312,7 @@ pg_newlocale_from_collation(Oid collid)
/* We'll fill in the result struct locally before allocating memory */
memset(&result, 0, sizeof(result));
result.provider = collform->collprovider;
+ result.deterministic = collform->collisdeterministic;
if (collform->collprovider == COLLPROVIDER_LIBC)
{
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 6d443db7e2..72f8a9d69c 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -676,6 +676,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
{
Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
quoteOneName(attname,
RIAttName(fk_rel, riinfo->fk_attnums[i]));
@@ -684,6 +686,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
paramname, pk_type,
riinfo->pf_eq_oprs[i],
attname, fk_type);
+ if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+ ri_GenerateQualCollation(&querybuf, pk_coll);
querysep = "AND";
queryoids[i] = pk_type;
}
@@ -778,6 +782,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
{
Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
quoteOneName(attname,
RIAttName(fk_rel, riinfo->fk_attnums[i]));
@@ -786,6 +792,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
paramname, pk_type,
riinfo->pf_eq_oprs[i],
attname, fk_type);
+ if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+ ri_GenerateQualCollation(&querybuf, pk_coll);
querysep = "AND";
queryoids[i] = pk_type;
}
@@ -890,6 +898,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
{
Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
quoteOneName(attname,
RIAttName(fk_rel, riinfo->fk_attnums[i]));
@@ -901,6 +911,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
paramname, pk_type,
riinfo->pf_eq_oprs[i],
attname, fk_type);
+ if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+ ri_GenerateQualCollation(&querybuf, pk_coll);
querysep = ",";
qualsep = "AND";
queryoids[i] = pk_type;
@@ -1065,6 +1077,8 @@ ri_set(TriggerData *trigdata, bool is_set_null)
{
Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
quoteOneName(attname,
RIAttName(fk_rel, riinfo->fk_attnums[i]));
@@ -1077,6 +1091,8 @@ ri_set(TriggerData *trigdata, bool is_set_null)
paramname, pk_type,
riinfo->pf_eq_oprs[i],
attname, fk_type);
+ if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+ ri_GenerateQualCollation(&querybuf, pk_coll);
querysep = ",";
qualsep = "AND";
queryoids[i] = pk_type;
@@ -2496,11 +2512,20 @@ ri_AttributesEqual(Oid eq_opr, Oid typeid,
}
/*
- * Apply the comparison operator. We assume it doesn't care about
- * collations.
+ * Apply the comparison operator.
+ *
+ * Note: This function is part of a call stack that determines whether an
+ * update to a row is significant enough that it needs checking or action
+ * on the other side of a foreign-key constraint. Therefore, the
+ * comparison here would need to be done with the collation of the *other*
+ * table. For simplicity (e.g., we might not even have the other table
+ * open), we'll just use the default collation here, which could lead to
+ * some false negatives. All this would break if we ever allow
+ * database-wide collations to be nondeterministic.
*/
- return DatumGetBool(FunctionCall2(&entry->eq_opr_finfo,
- oldvalue, newvalue));
+ return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo,
+ DEFAULT_COLLATION_OID,
+ oldvalue, newvalue));
}
/*
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 440fc8ed66..4003631d8f 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -23,6 +23,8 @@
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/hashutils.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
#include "utils/varlena.h"
#include "mb/pg_wchar.h"
@@ -717,6 +719,22 @@ bpcharoctetlen(PG_FUNCTION_ARGS)
* need to be so careful.
*****************************************************************************/
+static void
+check_collation_set(Oid collid)
+{
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+}
+
Datum
bpchareq(PG_FUNCTION_ARGS)
{
@@ -725,18 +743,31 @@ bpchareq(PG_FUNCTION_ARGS)
int len1,
len2;
bool result;
+ Oid collid = PG_GET_COLLATION();
+
+ check_collation_set(collid);
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
- /*
- * Since we only care about equality or not-equality, we can avoid all the
- * expense of strcoll() here, and just do bitwise comparison.
- */
- if (len1 != len2)
- result = false;
+ if (lc_collate_is_c(collid) ||
+ collid == DEFAULT_COLLATION_OID ||
+ pg_newlocale_from_collation(collid)->deterministic)
+ {
+ /*
+ * Since we only care about equality or not-equality, we can avoid all the
+ * expense of strcoll() here, and just do bitwise comparison.
+ */
+ if (len1 != len2)
+ result = false;
+ else
+ result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
+ }
else
- result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
+ {
+ result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ collid) == 0);
+ }
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -752,18 +783,29 @@ bpcharne(PG_FUNCTION_ARGS)
int len1,
len2;
bool result;
+ Oid collid = PG_GET_COLLATION();
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
- /*
- * Since we only care about equality or not-equality, we can avoid all the
- * expense of strcoll() here, and just do bitwise comparison.
- */
- if (len1 != len2)
- result = true;
+ if (lc_collate_is_c(collid) ||
+ collid == DEFAULT_COLLATION_OID ||
+ pg_newlocale_from_collation(collid)->deterministic)
+ {
+ /*
+ * Since we only care about equality or not-equality, we can avoid all the
+ * expense of strcoll() here, and just do bitwise comparison.
+ */
+ if (len1 != len2)
+ result = true;
+ else
+ result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
+ }
else
- result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
+ {
+ result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ collid) != 0);
+ }
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -933,23 +975,60 @@ bpchar_smaller(PG_FUNCTION_ARGS)
/*
* bpchar needs a specialized hash function because we want to ignore
* trailing blanks in comparisons.
- *
- * Note: currently there is no need for locale-specific behavior here,
- * but if we ever change the semantics of bpchar comparison to trust
- * strcoll() completely, we'd need to do something different in non-C locales.
*/
Datum
hashbpchar(PG_FUNCTION_ARGS)
{
BpChar *key = PG_GETARG_BPCHAR_PP(0);
+ Oid collid = PG_GET_COLLATION();
char *keydata;
int keylen;
+ pg_locale_t mylocale = 0;
Datum result;
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
- result = hash_any((unsigned char *) keydata, keylen);
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any((unsigned char *) keydata, keylen);
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, keydata, keylen);
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any(buf, bsize);
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
@@ -961,15 +1040,56 @@ Datum
hashbpcharextended(PG_FUNCTION_ARGS)
{
BpChar *key = PG_GETARG_BPCHAR_PP(0);
+ Oid collid = PG_GET_COLLATION();
char *keydata;
int keylen;
+ pg_locale_t mylocale = 0;
Datum result;
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
- result = hash_any_extended((unsigned char *) keydata, keylen,
- PG_GETARG_INT64(1));
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any_extended((unsigned char *) keydata, keylen,
+ PG_GETARG_INT64(1));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
PG_FREE_IF_COPY(key, 0);
@@ -985,12 +1105,23 @@ hashbpcharextended(PG_FUNCTION_ARGS)
*/
static int
-internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
+internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2, Oid collid)
{
int result;
int len1,
len2;
+ check_collation_set(collid);
+
+ /*
+ * see internal_text_pattern_compare()
+ */
+ if (!get_collation_isdeterministic(collid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+ "bpchar_pattern_ops")));
+
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
@@ -1013,7 +1144,7 @@ bpchar_pattern_lt(PG_FUNCTION_ARGS)
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
int result;
- result = internal_bpchar_pattern_compare(arg1, arg2);
+ result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -1029,7 +1160,7 @@ bpchar_pattern_le(PG_FUNCTION_ARGS)
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
int result;
- result = internal_bpchar_pattern_compare(arg1, arg2);
+ result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -1045,7 +1176,7 @@ bpchar_pattern_ge(PG_FUNCTION_ARGS)
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
int result;
- result = internal_bpchar_pattern_compare(arg1, arg2);
+ result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -1061,7 +1192,7 @@ bpchar_pattern_gt(PG_FUNCTION_ARGS)
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
int result;
- result = internal_bpchar_pattern_compare(arg1, arg2);
+ result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -1077,7 +1208,7 @@ btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
int result;
- result = internal_bpchar_pattern_compare(arg1, arg2);
+ result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -1090,8 +1221,17 @@ Datum
btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
MemoryContext oldcontext;
+ check_collation_set(collid);
+
+ if (!get_collation_isdeterministic(collid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+ "bpchar_pattern_ops")));
+
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
/* Use generic string SortSupport, forcing "C" collation */
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 39c394331b..68a6e49aeb 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -122,13 +122,14 @@ static text *text_substring(Datum str,
int32 length,
bool length_not_specified);
static text *text_overlay(text *t1, text *t2, int sp, int sl);
-static int text_position(text *t1, text *t2);
-static void text_position_setup(text *t1, text *t2, TextPositionState *state);
+static int text_position(text *t1, text *t2, Oid collid);
+static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
static bool text_position_next(TextPositionState *state);
static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
static char *text_position_get_match_ptr(TextPositionState *state);
static int text_position_get_match_pos(TextPositionState *state);
static void text_position_cleanup(TextPositionState *state);
+static void check_collation_set(Oid collid);
static int text_cmp(text *arg1, text *arg2, Oid collid);
static bytea *bytea_catenate(bytea *t1, bytea *t2);
static bytea *bytea_substring(Datum str,
@@ -1094,7 +1095,7 @@ textpos(PG_FUNCTION_ARGS)
text *str = PG_GETARG_TEXT_PP(0);
text *search_str = PG_GETARG_TEXT_PP(1);
- PG_RETURN_INT32((int32) text_position(str, search_str));
+ PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
}
/*
@@ -1112,7 +1113,7 @@ textpos(PG_FUNCTION_ARGS)
* functions.
*/
static int
-text_position(text *t1, text *t2)
+text_position(text *t1, text *t2, Oid collid)
{
TextPositionState state;
int result;
@@ -1120,7 +1121,7 @@ text_position(text *t1, text *t2)
if (VARSIZE_ANY_EXHDR(t1) < 1 || VARSIZE_ANY_EXHDR(t2) < 1)
return 0;
- text_position_setup(t1, t2, &state);
+ text_position_setup(t1, t2, collid, &state);
if (!text_position_next(&state))
result = 0;
else
@@ -1147,10 +1148,21 @@ text_position(text *t1, text *t2)
*/
static void
-text_position_setup(text *t1, text *t2, TextPositionState *state)
+text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
{
int len1 = VARSIZE_ANY_EXHDR(t1);
int len2 = VARSIZE_ANY_EXHDR(t2);
+ pg_locale_t mylocale = 0;
+
+ check_collation_set(collid);
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (mylocale && !mylocale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for substring searches")));
Assert(len1 > 0);
Assert(len2 > 0);
@@ -1429,6 +1441,22 @@ text_position_cleanup(TextPositionState *state)
/* no cleanup needed */
}
+static void
+check_collation_set(Oid collid)
+{
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+}
+
/* varstr_cmp()
* Comparison function for text strings with given lengths.
* Includes locale support, but must copy strings to temporary memory
@@ -1441,6 +1469,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
{
int result;
+ check_collation_set(collid);
+
/*
* Unfortunately, there is no strncoll(), so in the non-C locale case we
* have to do some memory copying. This turns out to be significantly
@@ -1462,20 +1492,7 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
pg_locale_t mylocale = 0;
if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for string comparison"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
mylocale = pg_newlocale_from_collation(collid);
- }
/*
* memcmp() can't tell us which of two unequal strings sorts first,
@@ -1558,13 +1575,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
ereport(ERROR,
(errmsg("could not compare Unicode strings: %m")));
- /*
- * In some locales wcscoll() can claim that nonidentical strings
- * are equal. Believing that would be bad news for a number of
- * reasons, so we follow Perl's lead and sort "equal" strings
- * according to strcmp (on the UTF-8 representation).
- */
- if (result == 0)
+ /* Break tie if necessary. */
+ if (result == 0 &&
+ (!mylocale || mylocale->deterministic))
{
result = memcmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
@@ -1649,13 +1662,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
else
result = strcoll(a1p, a2p);
- /*
- * In some locales strcoll() can claim that nonidentical strings are
- * equal. Believing that would be bad news for a number of reasons,
- * so we follow Perl's lead and sort "equal" strings according to
- * strcmp().
- */
- if (result == 0)
+ /* Break tie if necessary. */
+ if (result == 0 &&
+ (!mylocale || mylocale->deterministic))
result = strcmp(a1p, a2p);
if (a1p != a1buf)
@@ -1699,33 +1708,52 @@ text_cmp(text *arg1, text *arg2, Oid collid)
Datum
texteq(PG_FUNCTION_ARGS)
{
- Datum arg1 = PG_GETARG_DATUM(0);
- Datum arg2 = PG_GETARG_DATUM(1);
+ Oid collid = PG_GET_COLLATION();
bool result;
- Size len1,
- len2;
- /*
- * Since we only care about equality or not-equality, we can avoid all the
- * expense of strcoll() here, and just do bitwise comparison. In fact, we
- * don't even have to do a bitwise comparison if we can show the lengths
- * of the strings are unequal; which might save us from having to detoast
- * one or both values.
- */
- len1 = toast_raw_datum_size(arg1);
- len2 = toast_raw_datum_size(arg2);
- if (len1 != len2)
- result = false;
+ check_collation_set(collid);
+
+ if (lc_collate_is_c(collid) ||
+ collid == DEFAULT_COLLATION_OID ||
+ pg_newlocale_from_collation(collid)->deterministic)
+ {
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ Size len1,
+ len2;
+
+ /*
+ * Since we only care about equality or not-equality, we can avoid all the
+ * expense of strcoll() here, and just do bitwise comparison. In fact, we
+ * don't even have to do a bitwise comparison if we can show the lengths
+ * of the strings are unequal; which might save us from having to detoast
+ * one or both values.
+ */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len1 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+ }
else
{
- text *targ1 = DatumGetTextPP(arg1);
- text *targ2 = DatumGetTextPP(arg2);
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
- result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
- len1 - VARHDRSZ) == 0);
+ result = (text_cmp(arg1, arg2, collid) == 0);
- PG_FREE_IF_COPY(targ1, 0);
- PG_FREE_IF_COPY(targ2, 1);
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
}
PG_RETURN_BOOL(result);
@@ -1734,27 +1762,46 @@ texteq(PG_FUNCTION_ARGS)
Datum
textne(PG_FUNCTION_ARGS)
{
- Datum arg1 = PG_GETARG_DATUM(0);
- Datum arg2 = PG_GETARG_DATUM(1);
+ Oid collid = PG_GET_COLLATION();
bool result;
- Size len1,
- len2;
- /* See comment in texteq() */
- len1 = toast_raw_datum_size(arg1);
- len2 = toast_raw_datum_size(arg2);
- if (len1 != len2)
- result = true;
+ check_collation_set(collid);
+
+ if (lc_collate_is_c(collid) ||
+ collid == DEFAULT_COLLATION_OID ||
+ pg_newlocale_from_collation(collid)->deterministic)
+ {
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ Size len1,
+ len2;
+
+ /* See comment in texteq() */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = true;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len1 - VARHDRSZ) != 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+ }
else
{
- text *targ1 = DatumGetTextPP(arg1);
- text *targ2 = DatumGetTextPP(arg2);
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
- result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
- len1 - VARHDRSZ) != 0);
+ result = (text_cmp(arg1, arg2, collid) != 0);
- PG_FREE_IF_COPY(targ1, 0);
- PG_FREE_IF_COPY(targ2, 1);
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
}
PG_RETURN_BOOL(result);
@@ -1825,10 +1872,22 @@ text_starts_with(PG_FUNCTION_ARGS)
{
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
+ Oid collid = PG_GET_COLLATION();
+ pg_locale_t mylocale = 0;
bool result;
Size len1,
len2;
+ check_collation_set(collid);
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (mylocale && !mylocale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for substring searches")));
+
len1 = toast_raw_datum_size(arg1);
len2 = toast_raw_datum_size(arg2);
if (len2 > len1)
@@ -1898,6 +1957,8 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
VarStringSortSupport *sss;
pg_locale_t locale = 0;
+ check_collation_set(collid);
+
/*
* If possible, set ssup->comparator to a function which can be used to
* directly compare two datums. If we can do this, we'll avoid the
@@ -1934,20 +1995,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
* result.
*/
if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for string comparison"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
locale = pg_newlocale_from_collation(collid);
- }
/*
* There is a further exception on Windows. When the database
@@ -2328,12 +2376,9 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
else
result = strcoll(sss->buf1, sss->buf2);
- /*
- * In some locales strcoll() can claim that nonidentical strings are
- * equal. Believing that would be bad news for a number of reasons, so we
- * follow Perl's lead and sort "equal" strings according to strcmp().
- */
- if (result == 0)
+ /* Break tie if necessary. */
+ if (result == 0 &&
+ (!sss->locale || sss->locale->deterministic))
result = strcmp(sss->buf1, sss->buf2);
/* Cache result, perhaps saving an expensive strcoll() call next time */
@@ -2760,10 +2805,18 @@ nameeqtext(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_PP(1);
size_t len1 = strlen(NameStr(*arg1));
size_t len2 = VARSIZE_ANY_EXHDR(arg2);
+ Oid collid = PG_GET_COLLATION();
bool result;
- result = (len1 == len2 &&
- memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+ check_collation_set(collid);
+
+ if (collid == C_COLLATION_OID)
+ result = (len1 == len2 &&
+ memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+ else
+ result = (varstr_cmp(NameStr(*arg1), len1,
+ VARDATA_ANY(arg2), len2,
+ collid) == 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -2777,10 +2830,18 @@ texteqname(PG_FUNCTION_ARGS)
Name arg2 = PG_GETARG_NAME(1);
size_t len1 = VARSIZE_ANY_EXHDR(arg1);
size_t len2 = strlen(NameStr(*arg2));
+ Oid collid = PG_GET_COLLATION();
bool result;
- result = (len1 == len2 &&
- memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+ check_collation_set(collid);
+
+ if (collid == C_COLLATION_OID)
+ result = (len1 == len2 &&
+ memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+ else
+ result = (varstr_cmp(VARDATA_ANY(arg1), len1,
+ NameStr(*arg2), len2,
+ collid) == 0);
PG_FREE_IF_COPY(arg1, 0);
@@ -2794,10 +2855,18 @@ namenetext(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_PP(1);
size_t len1 = strlen(NameStr(*arg1));
size_t len2 = VARSIZE_ANY_EXHDR(arg2);
+ Oid collid = PG_GET_COLLATION();
bool result;
- result = !(len1 == len2 &&
- memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+ check_collation_set(collid);
+
+ if (collid == C_COLLATION_OID)
+ result = !(len1 == len2 &&
+ memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+ else
+ result = !(varstr_cmp(NameStr(*arg1), len1,
+ VARDATA_ANY(arg2), len2,
+ collid) == 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -2811,10 +2880,18 @@ textnename(PG_FUNCTION_ARGS)
Name arg2 = PG_GETARG_NAME(1);
size_t len1 = VARSIZE_ANY_EXHDR(arg1);
size_t len2 = strlen(NameStr(*arg2));
+ Oid collid = PG_GET_COLLATION();
bool result;
- result = !(len1 == len2 &&
- memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+ check_collation_set(collid);
+
+ if (collid == C_COLLATION_OID)
+ result = !(len1 == len2 &&
+ memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+ else
+ result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
+ NameStr(*arg2), len2,
+ collid) == 0);
PG_FREE_IF_COPY(arg1, 0);
@@ -2919,12 +2996,34 @@ textgename(PG_FUNCTION_ARGS)
*/
static int
-internal_text_pattern_compare(text *arg1, text *arg2)
+internal_text_pattern_compare(text *arg1, text *arg2, Oid collid)
{
int result;
int len1,
len2;
+ check_collation_set(collid);
+
+ /*
+ * XXX We cannot use a text_pattern_ops index for nondeterministic
+ * collations, because these operators intentionally ignore the collation.
+ * However, the planner has no way to know that, so it might choose such
+ * an index for an "=" clause, which would lead to wrong results. This
+ * check here doesn't prevent choosing the index, but it will at least
+ * error out if the index is chosen. A text_pattern_ops index on a column
+ * with nondeterministic collation is pretty useless anyway, since LIKE
+ * etc. won't work there either. A future possibility would be to
+ * annotate the operator class or its members in the catalog to avoid the
+ * index. Another alternative is to stay away from the *_pattern_ops
+ * operator classes and prefer creating LIKE-supporting indexes with
+ * COLLATE "C".
+ */
+ if (!get_collation_isdeterministic(collid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+ "text_pattern_ops")));
+
len1 = VARSIZE_ANY_EXHDR(arg1);
len2 = VARSIZE_ANY_EXHDR(arg2);
@@ -2947,7 +3046,7 @@ text_pattern_lt(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_PP(1);
int result;
- result = internal_text_pattern_compare(arg1, arg2);
+ result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -2963,7 +3062,7 @@ text_pattern_le(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_PP(1);
int result;
- result = internal_text_pattern_compare(arg1, arg2);
+ result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -2979,7 +3078,7 @@ text_pattern_ge(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_PP(1);
int result;
- result = internal_text_pattern_compare(arg1, arg2);
+ result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -2995,7 +3094,7 @@ text_pattern_gt(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_PP(1);
int result;
- result = internal_text_pattern_compare(arg1, arg2);
+ result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -3011,7 +3110,7 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_PP(1);
int result;
- result = internal_text_pattern_compare(arg1, arg2);
+ result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -3024,8 +3123,17 @@ Datum
bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
MemoryContext oldcontext;
+ check_collation_set(collid);
+
+ if (!get_collation_isdeterministic(collid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+ "text_pattern_ops")));
+
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
/* Use generic string SortSupport, forcing "C" collation */
@@ -4121,7 +4229,7 @@ replace_text(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(src_text);
}
- text_position_setup(src_text, from_sub_text, &state);
+ text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
found = text_position_next(&state);
@@ -4482,7 +4590,7 @@ split_text(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(cstring_to_text(""));
}
- text_position_setup(inputstring, fldsep, &state);
+ text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
/* identify bounds of first field */
start_ptr = VARDATA_ANY(inputstring);
@@ -4538,11 +4646,12 @@ split_text(PG_FUNCTION_ARGS)
* Convenience function to return true when two text params are equal.
*/
static bool
-text_isequal(text *txt1, text *txt2)
+text_isequal(text *txt1, text *txt2, Oid collid)
{
- return DatumGetBool(DirectFunctionCall2(texteq,
- PointerGetDatum(txt1),
- PointerGetDatum(txt2)));
+ return DatumGetBool(DirectFunctionCall2Coll(texteq,
+ collid,
+ PointerGetDatum(txt1),
+ PointerGetDatum(txt2)));
}
/*
@@ -4633,7 +4742,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
int lbs[1];
/* single element can be a NULL too */
- is_null = null_string ? text_isequal(inputstring, null_string) : false;
+ is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false;
elems[0] = PointerGetDatum(inputstring);
nulls[0] = is_null;
@@ -4645,7 +4754,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
TEXTOID, -1, false, 'i'));
}
- text_position_setup(inputstring, fldsep, &state);
+ text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
start_ptr = VARDATA_ANY(inputstring);
@@ -4673,7 +4782,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
/* must build a temp text datum to pass to accumArrayResult */
result_text = cstring_to_text_with_len(start_ptr, chunk_len);
- is_null = null_string ? text_isequal(result_text, null_string) : false;
+ is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
/* stash away this field */
astate = accumArrayResult(astate,
@@ -4715,7 +4824,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
/* must build a temp text datum to pass to accumArrayResult */
result_text = cstring_to_text_with_len(start_ptr, chunk_len);
- is_null = null_string ? text_isequal(result_text, null_string) : false;
+ is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
/* stash away this field */
astate = accumArrayResult(astate,
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 07e1cd7696..d05930bc4c 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -170,13 +170,18 @@ int4hashfast(Datum datum)
static bool
texteqfast(Datum a, Datum b)
{
- return DatumGetBool(DirectFunctionCall2(texteq, a, b));
+ /*
+ * The use of DEFAULT_COLLATION_OID is fairly arbitrary here. We just
+ * want to take the fast "deterministic" path in texteq().
+ */
+ return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b));
}
static uint32
texthashfast(Datum datum)
{
- return DatumGetInt32(DirectFunctionCall1(hashtext, datum));
+ /* analogously here as in texteqfast() */
+ return DatumGetInt32(DirectFunctionCall1Coll(hashtext, DEFAULT_COLLATION_OID, datum));
}
static bool
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index e88c45d268..59e6bcd856 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -908,6 +908,22 @@ get_collation_name(Oid colloid)
return NULL;
}
+bool
+get_collation_isdeterministic(Oid colloid)
+{
+ HeapTuple tp;
+ Form_pg_collation colltup;
+ bool result;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for collation %u", colloid);
+ colltup = (Form_pg_collation) GETSTRUCT(tp);
+ result = colltup->collisdeterministic;
+ ReleaseSysCache(tp);
+ return result;
+}
+
/* ---------- CONSTRAINT CACHE ---------- */
/*