summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2021-02-02 17:11:55 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2021-02-02 17:11:55 -0500
commit62f34097c88433ef1f3de604714fe7e7024f2fdf (patch)
tree6bcedf1e516ad5ebfca4c93d94469ebf2733caf1 /src/backend
parent479331406e8403cc2e75d1082f8c613e7669c113 (diff)
downloadpostgresql-62f34097c88433ef1f3de604714fe7e7024f2fdf.tar.gz
Build in some knowledge about foreign-key relationships in the catalogs.
This follows in the spirit of commit dfb75e478, which created primary key and uniqueness constraints to improve the visibility of constraints imposed on the system catalogs. While our catalogs contain many foreign-key-like relationships, they don't quite follow SQL semantics, in that the convention for an omitted reference is to write zero not NULL. Plus, we have some cases in which there are arrays each of whose elements is supposed to be an FK reference; SQL has no way to model that. So we can't create actual foreign key constraints to describe the situation. Nonetheless, we can collect and use knowledge about these relationships. This patch therefore adds annotations to the catalog header files to declare foreign-key relationships. (The BKI_LOOKUP annotations cover simple cases, but we weren't previously distinguishing which such columns are allowed to contain zeroes; we also need new markings for multi-column FK references.) Then, Catalog.pm and genbki.pl are taught to collect this information into a table in a new generated header "system_fk_info.h". The only user of that at the moment is a new SQL function pg_get_catalog_foreign_keys(), which exposes the table to SQL. The oidjoins regression test is rewritten to use pg_get_catalog_foreign_keys() to find out which columns to check. Aside from removing the need for manual maintenance of that test script, this allows it to cover numerous relationships that were not checked by the old implementation based on findoidjoins. (As of this commit, 217 relationships are checked by the test, versus 181 before.) Discussion: https://postgr.es/m/3240355.1612129197@sss.pgh.pa.us
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/catalog/.gitignore1
-rw-r--r--src/backend/catalog/Catalog.pm28
-rw-r--r--src/backend/catalog/Makefile2
-rw-r--r--src/backend/catalog/genbki.pl123
-rw-r--r--src/backend/utils/adt/misc.c80
5 files changed, 214 insertions, 20 deletions
diff --git a/src/backend/catalog/.gitignore b/src/backend/catalog/.gitignore
index 4bd3ee9d7f..237ff54165 100644
--- a/src/backend/catalog/.gitignore
+++ b/src/backend/catalog/.gitignore
@@ -1,5 +1,6 @@
/postgres.bki
/schemapg.h
+/system_fk_info.h
/system_constraints.sql
/pg_*_d.h
/bki-stamp
diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index 061f3d8c21..b44d568b54 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -105,6 +105,17 @@ sub ParseHeader
index_decl => $5
};
}
+ elsif (/^DECLARE_(ARRAY_)?FOREIGN_KEY(_OPT)?\(\s*\(([^)]+)\),\s*(\w+),\s*\(([^)]+)\)\)/)
+ {
+ push @{ $catalog{foreign_keys} },
+ {
+ is_array => $1 ? 1 : 0,
+ is_opt => $2 ? 1 : 0,
+ fk_cols => $3,
+ pk_table => $4,
+ pk_cols => $5
+ };
+ }
elsif (/^CATALOG\((\w+),(\d+),(\w+)\)/)
{
$catalog{catname} = $1;
@@ -197,9 +208,22 @@ sub ParseHeader
{
$column{array_default} = $1;
}
- elsif ($attopt =~ /BKI_LOOKUP\((\w+)\)/)
+ elsif ($attopt =~ /BKI_LOOKUP(_OPT)?\((\w+)\)/)
{
- $column{lookup} = $1;
+ $column{lookup} = $2;
+ $column{lookup_opt} = $1 ? 1 : 0;
+ # BKI_LOOKUP implicitly makes an FK reference
+ push @{ $catalog{foreign_keys} },
+ {
+ is_array =>
+ ($atttype eq 'oidvector' || $atttype eq '_oid')
+ ? 1
+ : 0,
+ is_opt => $column{lookup_opt},
+ fk_cols => $attname,
+ pk_table => $column{lookup},
+ pk_cols => 'oid'
+ };
}
else
{
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 995ddf1285..70bc2123df 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -70,7 +70,7 @@ CATALOG_HEADERS := \
pg_sequence.h pg_publication.h pg_publication_rel.h pg_subscription.h \
pg_subscription_rel.h
-GENERATED_HEADERS := $(CATALOG_HEADERS:%.h=%_d.h) schemapg.h
+GENERATED_HEADERS := $(CATALOG_HEADERS:%.h=%_d.h) schemapg.h system_fk_info.h
POSTGRES_BKI_SRCS := $(addprefix $(top_srcdir)/src/include/catalog/, $(CATALOG_HEADERS))
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index b68c1752c0..5bdc7adc44 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -213,6 +213,12 @@ foreach my $row (@{ $catalog_data{pg_am} })
$amoids{ $row->{amname} } = $row->{oid};
}
+# There is only one authid at bootstrap time, and we handle it specially:
+# the usually-defaulted symbol PGUID becomes the bootstrap superuser's OID.
+# (We could drop this in favor of writing out BKI_DEFAULT(POSTGRES) ...)
+my %authidoids;
+$authidoids{'PGUID'} = $BOOTSTRAP_SUPERUSERID;
+
# class (relation) OID lookup (note this only covers bootstrap catalogs!)
my %classoids;
foreach my $row (@{ $catalog_data{pg_class} })
@@ -234,6 +240,12 @@ foreach my $row (@{ $catalog_data{pg_language} })
$langoids{ $row->{lanname} } = $row->{oid};
}
+# There is only one namespace at bootstrap time, and we handle it specially:
+# the usually-defaulted symbol PGNSP becomes the pg_catalog namespace's OID.
+# (We could drop this in favor of writing out BKI_DEFAULT(pg_catalog) ...)
+my %namespaceoids;
+$namespaceoids{'PGNSP'} = $PG_CATALOG_NAMESPACE;
+
# opclass OID lookup
my %opcoids;
foreach my $row (@{ $catalog_data{pg_opclass} })
@@ -376,9 +388,11 @@ close $ef;
# Map lookup name to the corresponding hash table.
my %lookup_kind = (
pg_am => \%amoids,
+ pg_authid => \%authidoids,
pg_class => \%classoids,
pg_collation => \%collationoids,
pg_language => \%langoids,
+ pg_namespace => \%namespaceoids,
pg_opclass => \%opcoids,
pg_operator => \%operoids,
pg_opfamily => \%opfoids,
@@ -400,6 +414,9 @@ open my $bki, '>', $bkifile . $tmpext
my $schemafile = $output_path . 'schemapg.h';
open my $schemapg, '>', $schemafile . $tmpext
or die "can't open $schemafile$tmpext: $!";
+my $fk_info_file = $output_path . 'system_fk_info.h';
+open my $fk_info, '>', $fk_info_file . $tmpext
+ or die "can't open $fk_info_file$tmpext: $!";
my $constraints_file = $output_path . 'system_constraints.sql';
open my $constraints, '>', $constraints_file . $tmpext
or die "can't open $constraints_file$tmpext: $!";
@@ -554,18 +571,14 @@ EOM
$GenbkiNextOid++;
}
- # Substitute constant values we acquired above.
- # (It's intentional that this can apply to parts of a field).
- $bki_values{$attname} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
- $bki_values{$attname} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
-
# Replace OID synonyms with OIDs per the appropriate lookup rule.
#
# If the column type is oidvector or _oid, we have to replace
# each element of the array as per the lookup rule.
if ($column->{lookup})
{
- my $lookup = $lookup_kind{ $column->{lookup} };
+ my $lookup = $lookup_kind{ $column->{lookup} };
+ my $lookup_opt = $column->{lookup_opt};
my @lookupnames;
my @lookupoids;
@@ -575,8 +588,9 @@ EOM
if ($atttype eq 'oidvector')
{
@lookupnames = split /\s+/, $bki_values{$attname};
- @lookupoids = lookup_oids($lookup, $catname, \%bki_values,
- @lookupnames);
+ @lookupoids =
+ lookup_oids($lookup, $catname, $attname, $lookup_opt,
+ \%bki_values, @lookupnames);
$bki_values{$attname} = join(' ', @lookupoids);
}
elsif ($atttype eq '_oid')
@@ -586,8 +600,8 @@ EOM
$bki_values{$attname} =~ s/[{}]//g;
@lookupnames = split /,/, $bki_values{$attname};
@lookupoids =
- lookup_oids($lookup, $catname, \%bki_values,
- @lookupnames);
+ lookup_oids($lookup, $catname, $attname,
+ $lookup_opt, \%bki_values, @lookupnames);
$bki_values{$attname} = sprintf "{%s}",
join(',', @lookupoids);
}
@@ -595,8 +609,9 @@ EOM
else
{
$lookupnames[0] = $bki_values{$attname};
- @lookupoids = lookup_oids($lookup, $catname, \%bki_values,
- @lookupnames);
+ @lookupoids =
+ lookup_oids($lookup, $catname, $attname, $lookup_opt,
+ \%bki_values, @lookupnames);
$bki_values{$attname} = $lookupoids[0];
}
}
@@ -706,14 +721,78 @@ foreach my $table_name (@tables_needing_macros)
# Closing boilerplate for schemapg.h
print $schemapg "\n#endif\t\t\t\t\t\t\t/* SCHEMAPG_H */\n";
+# Now generate system_fk_info.h
+
+# Opening boilerplate for system_fk_info.h
+print $fk_info <<EOM;
+/*-------------------------------------------------------------------------
+ *
+ * system_fk_info.h
+ * Data about the foreign-key relationships in the system catalogs
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES
+ * ******************************
+ * *** DO NOT EDIT THIS FILE! ***
+ * ******************************
+ *
+ * It has been GENERATED by src/backend/catalog/genbki.pl
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SYSTEM_FK_INFO_H
+#define SYSTEM_FK_INFO_H
+
+typedef struct SysFKRelationship
+{
+ Oid fk_table; /* referencing catalog */
+ Oid pk_table; /* referenced catalog */
+ const char *fk_columns; /* referencing column name(s) */
+ const char *pk_columns; /* referenced column name(s) */
+ bool is_array; /* if true, last fk_column is an array */
+ bool is_opt; /* if true, fk_column can be zero */
+} SysFKRelationship;
+
+static const SysFKRelationship sys_fk_relationships[] = {
+EOM
+
+# Emit system_fk_info data
+foreach my $catname (@catnames)
+{
+ my $catalog = $catalogs{$catname};
+ foreach my $fkinfo (@{ $catalog->{foreign_keys} })
+ {
+ my $pktabname = $fkinfo->{pk_table};
+
+ # We use BKI_LOOKUP for encodings, but there's no real catalog there
+ next if $pktabname eq 'encoding';
+
+ printf $fk_info
+ "\t{ /* %s */ %s, /* %s */ %s, \"{%s}\", \"{%s}\", %s, %s},\n",
+ $catname, $catalog->{relation_oid},
+ $pktabname, $catalogs{$pktabname}->{relation_oid},
+ $fkinfo->{fk_cols},
+ $fkinfo->{pk_cols},
+ ($fkinfo->{is_array} ? "true" : "false"),
+ ($fkinfo->{is_opt} ? "true" : "false");
+ }
+}
+
+# Closing boilerplate for system_fk_info.h
+print $fk_info "};\n\n#endif\t\t\t\t\t\t\t/* SYSTEM_FK_INFO_H */\n";
+
# We're done emitting data
close $bki;
close $schemapg;
+close $fk_info;
close $constraints;
# Finally, rename the completed files into place.
Catalog::RenameTempFile($bkifile, $tmpext);
Catalog::RenameTempFile($schemafile, $tmpext);
+Catalog::RenameTempFile($fk_info_file, $tmpext);
Catalog::RenameTempFile($constraints_file, $tmpext);
exit 0;
@@ -948,7 +1027,8 @@ sub morph_row_for_schemapg
# within this genbki.pl run.)
sub lookup_oids
{
- my ($lookup, $catname, $bki_values, @lookupnames) = @_;
+ my ($lookup, $catname, $attname, $lookup_opt, $bki_values, @lookupnames)
+ = @_;
my @lookupoids;
foreach my $lookupname (@lookupnames)
@@ -961,10 +1041,19 @@ sub lookup_oids
else
{
push @lookupoids, $lookupname;
- warn sprintf
- "unresolved OID reference \"%s\" in %s.dat line %s\n",
- $lookupname, $catname, $bki_values->{line_number}
- if $lookupname ne '-' and $lookupname ne '0';
+ if ($lookupname eq '-' or $lookupname eq '0')
+ {
+ warn sprintf
+ "invalid zero OID reference in %s.dat field %s line %s\n",
+ $catname, $attname, $bki_values->{line_number}
+ if !$lookup_opt;
+ }
+ else
+ {
+ warn sprintf
+ "unresolved OID reference \"%s\" in %s.dat field %s line %s\n",
+ $lookupname, $catname, $attname, $bki_values->{line_number};
+ }
}
}
return @lookupoids;
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index 4096faff9a..634f574d7e 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -25,6 +25,7 @@
#include "catalog/catalog.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
+#include "catalog/system_fk_info.h"
#include "commands/dbcommands.h"
#include "commands/tablespace.h"
#include "common/keywords.h"
@@ -37,6 +38,7 @@
#include "storage/fd.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/ruleutils.h"
#include "utils/timestamp.h"
@@ -489,6 +491,84 @@ pg_get_keywords(PG_FUNCTION_ARGS)
}
+/* Function to return the list of catalog foreign key relationships */
+Datum
+pg_get_catalog_foreign_keys(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ FmgrInfo *arrayinp;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ tupdesc = CreateTemplateTupleDesc(6);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "fktable",
+ REGCLASSOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "fkcols",
+ TEXTARRAYOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "pktable",
+ REGCLASSOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "pkcols",
+ TEXTARRAYOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_array",
+ BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6, "is_opt",
+ BOOLOID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ /*
+ * We use array_in to convert the C strings in sys_fk_relationships[]
+ * to text arrays. But we cannot use DirectFunctionCallN to call
+ * array_in, and it wouldn't be very efficient if we could. Fill an
+ * FmgrInfo to use for the call.
+ */
+ arrayinp = (FmgrInfo *) palloc(sizeof(FmgrInfo));
+ fmgr_info(F_ARRAY_IN, arrayinp);
+ funcctx->user_fctx = arrayinp;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ arrayinp = (FmgrInfo *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < lengthof(sys_fk_relationships))
+ {
+ const SysFKRelationship *fkrel = &sys_fk_relationships[funcctx->call_cntr];
+ Datum values[6];
+ bool nulls[6];
+ HeapTuple tuple;
+
+ memset(nulls, false, sizeof(nulls));
+
+ values[0] = ObjectIdGetDatum(fkrel->fk_table);
+ values[1] = FunctionCall3(arrayinp,
+ CStringGetDatum(fkrel->fk_columns),
+ ObjectIdGetDatum(TEXTOID),
+ Int32GetDatum(-1));
+ values[2] = ObjectIdGetDatum(fkrel->pk_table);
+ values[3] = FunctionCall3(arrayinp,
+ CStringGetDatum(fkrel->pk_columns),
+ ObjectIdGetDatum(TEXTOID),
+ Int32GetDatum(-1));
+ values[4] = BoolGetDatum(fkrel->is_array);
+ values[5] = BoolGetDatum(fkrel->is_opt);
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+
/*
* Return the type of the argument.
*/