diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-06-28 11:59:34 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-08-05 22:19:46 -0400 |
| commit | 30885744142d89740d459f4dae670ba4775d1d8c (patch) | |
| tree | 0fdd30c0c96778029a48414195f95c5b1fdba30c /examples/sharding/attribute_shard.py | |
| parent | c7b489b25802f7a25ef78d0731411295c611cc1c (diff) | |
| download | sqlalchemy-30885744142d89740d459f4dae670ba4775d1d8c.tar.gz | |
Documentation updates for 1.4
* major additions to 1.4 migration doc; removed additional
verbosity regarding caching methodology and reorganized the
doc to present itself more as a "what's changed" guide
* as we now have a path for asyncio, update that doc so that
we aren't spreading obsolete information
* updates to the 2.0 migration guide with latest info, however
this is still an architecture doc and not a migration guide
yet, will need further rework.
* start really talking about 1.x vs. 2.0 style everywhere. Querying
is most of the docs so this is going to be a prominent
theme, start getting it to fit in
* Add introductory documentation for ORM example sections as these
are too sparse
* new documentation for do_orm_execute(), many separate sections,
adding deprecation notes to before_compile() and similar
* new example suites to illustrate do_orm_execute(),
with_loader_criteria()
* modernized horizontal sharding examples and added a separate
example to distinguish between multiple databases and single
database w/ multiple tables use case
* introducing DEEP ALCHEMY, will use zzzeeksphinx 1.1.6
* no name for the alchemist yet however the dragon's name
is Flambé
Change-Id: Id6b5c03b1ce9ddb7b280f66792212a0ef0a1c541
Diffstat (limited to 'examples/sharding/attribute_shard.py')
| -rw-r--r-- | examples/sharding/attribute_shard.py | 299 |
1 files changed, 0 insertions, 299 deletions
diff --git a/examples/sharding/attribute_shard.py b/examples/sharding/attribute_shard.py deleted file mode 100644 index 7b8f87d90..000000000 --- a/examples/sharding/attribute_shard.py +++ /dev/null @@ -1,299 +0,0 @@ -import datetime - -from sqlalchemy import Column -from sqlalchemy import create_engine -from sqlalchemy import DateTime -from sqlalchemy import Float -from sqlalchemy import ForeignKey -from sqlalchemy import inspect -from sqlalchemy import Integer -from sqlalchemy import String -from sqlalchemy import Table -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.ext.horizontal_shard import ShardedSession -from sqlalchemy.orm import relationship -from sqlalchemy.orm import sessionmaker -from sqlalchemy.sql import operators -from sqlalchemy.sql import visitors - - -echo = True -db1 = create_engine("sqlite://", echo=echo) -db2 = create_engine("sqlite://", echo=echo) -db3 = create_engine("sqlite://", echo=echo) -db4 = create_engine("sqlite://", echo=echo) - - -# create session function. this binds the shard ids -# to databases within a ShardedSession and returns it. -create_session = sessionmaker(class_=ShardedSession) - -create_session.configure( - shards={ - "north_america": db1, - "asia": db2, - "europe": db3, - "south_america": db4, - } -) - - -# mappings and tables -Base = declarative_base() - -# we need a way to create identifiers which are unique across all databases. -# one easy way would be to just use a composite primary key, where one value -# is the shard id. but here, we'll show something more "generic", an id -# generation function. we'll use a simplistic "id table" stored in database -# #1. Any other method will do just as well; UUID, hilo, application-specific, -# etc. - -ids = Table("ids", Base.metadata, Column("nextid", Integer, nullable=False)) - - -def id_generator(ctx): - # in reality, might want to use a separate transaction for this. - with db1.connect() as conn: - nextid = conn.scalar(ids.select(for_update=True)) - conn.execute(ids.update(values={ids.c.nextid: ids.c.nextid + 1})) - return nextid - - -# table setup. we'll store a lead table of continents/cities, and a secondary -# table storing locations. a particular row will be placed in the database -# whose shard id corresponds to the 'continent'. in this setup, secondary rows -# in 'weather_reports' will be placed in the same DB as that of the parent, but -# this can be changed if you're willing to write more complex sharding -# functions. - - -class WeatherLocation(Base): - __tablename__ = "weather_locations" - - id = Column(Integer, primary_key=True, default=id_generator) - continent = Column(String(30), nullable=False) - city = Column(String(50), nullable=False) - - reports = relationship("Report", backref="location") - - def __init__(self, continent, city): - self.continent = continent - self.city = city - - -class Report(Base): - __tablename__ = "weather_reports" - - id = Column(Integer, primary_key=True) - location_id = Column( - "location_id", Integer, ForeignKey("weather_locations.id") - ) - temperature = Column("temperature", Float) - report_time = Column( - "report_time", DateTime, default=datetime.datetime.now - ) - - def __init__(self, temperature): - self.temperature = temperature - - -# create tables -for db in (db1, db2, db3, db4): - Base.metadata.drop_all(db) - Base.metadata.create_all(db) - -# establish initial "id" in db1 -db1.execute(ids.insert(), nextid=1) - - -# step 5. define sharding functions. - -# we'll use a straight mapping of a particular set of "country" -# attributes to shard id. -shard_lookup = { - "North America": "north_america", - "Asia": "asia", - "Europe": "europe", - "South America": "south_america", -} - - -def shard_chooser(mapper, instance, clause=None): - """shard chooser. - - looks at the given instance and returns a shard id - note that we need to define conditions for - the WeatherLocation class, as well as our secondary Report class which will - point back to its WeatherLocation via its 'location' attribute. - - """ - if isinstance(instance, WeatherLocation): - return shard_lookup[instance.continent] - else: - return shard_chooser(mapper, instance.location) - - -def id_chooser(query, ident): - """id chooser. - - given a primary key, returns a list of shards - to search. here, we don't have any particular information from a - pk so we just return all shard ids. often, you'd want to do some - kind of round-robin strategy here so that requests are evenly - distributed among DBs. - - """ - if query.lazy_loaded_from: - # if we are in a lazy load, we can look at the parent object - # and limit our search to that same shard, assuming that's how we've - # set things up. - return [query.lazy_loaded_from.identity_token] - else: - return ["north_america", "asia", "europe", "south_america"] - - -def query_chooser(query): - """query chooser. - - this also returns a list of shard ids, which can - just be all of them. but here we'll search into the Query in order - to try to narrow down the list of shards to query. - - """ - ids = [] - - # we'll grab continent names as we find them - # and convert to shard ids - for column, operator, value in _get_query_comparisons(query): - # "shares_lineage()" returns True if both columns refer to the same - # statement column, adjusting for any annotations present. - # (an annotation is an internal clone of a Column object - # and occur when using ORM-mapped attributes like - # "WeatherLocation.continent"). A simpler comparison, though less - # accurate, would be "column.key == 'continent'". - if column.shares_lineage(WeatherLocation.__table__.c.continent): - if operator == operators.eq: - ids.append(shard_lookup[value]) - elif operator == operators.in_op: - ids.extend(shard_lookup[v] for v in value) - - if len(ids) == 0: - return ["north_america", "asia", "europe", "south_america"] - else: - return ids - - -def _get_query_comparisons(query): - """Search an orm.Query object for binary expressions. - - Returns expressions which match a Column against one or more - literal values as a list of tuples of the form - (column, operator, values). "values" is a single value - or tuple of values depending on the operator. - - """ - binds = {} - clauses = set() - comparisons = [] - - def visit_bindparam(bind): - # visit a bind parameter. - - # check in _params for it first - if bind.key in query._params: - value = query._params[bind.key] - elif bind.callable: - # some ORM functions (lazy loading) - # place the bind's value as a - # callable for deferred evaluation. - value = bind.callable() - else: - # just use .value - value = bind.value - - binds[bind] = value - - def visit_column(column): - clauses.add(column) - - def visit_binary(binary): - if binary.left in clauses and binary.right in binds: - comparisons.append( - (binary.left, binary.operator, binds[binary.right]) - ) - - elif binary.left in binds and binary.right in clauses: - comparisons.append( - (binary.right, binary.operator, binds[binary.left]) - ) - - # here we will traverse through the query's criterion, searching - # for SQL constructs. We will place simple column comparisons - # into a list. - if query._criterion is not None: - visitors.traverse_depthfirst( - query._criterion, - {}, - { - "bindparam": visit_bindparam, - "binary": visit_binary, - "column": visit_column, - }, - ) - return comparisons - - -# further configure create_session to use these functions -create_session.configure( - shard_chooser=shard_chooser, - id_chooser=id_chooser, - query_chooser=query_chooser, -) - -# save and load objects! - -tokyo = WeatherLocation("Asia", "Tokyo") -newyork = WeatherLocation("North America", "New York") -toronto = WeatherLocation("North America", "Toronto") -london = WeatherLocation("Europe", "London") -dublin = WeatherLocation("Europe", "Dublin") -brasilia = WeatherLocation("South America", "Brasila") -quito = WeatherLocation("South America", "Quito") - -tokyo.reports.append(Report(80.0)) -newyork.reports.append(Report(75)) -quito.reports.append(Report(85)) - -sess = create_session() - -sess.add_all([tokyo, newyork, toronto, london, dublin, brasilia, quito]) - -sess.commit() - -t = sess.query(WeatherLocation).get(tokyo.id) -assert t.city == tokyo.city -assert t.reports[0].temperature == 80.0 - -north_american_cities = sess.query(WeatherLocation).filter( - WeatherLocation.continent == "North America" -) -assert {c.city for c in north_american_cities} == {"New York", "Toronto"} - -asia_and_europe = sess.query(WeatherLocation).filter( - WeatherLocation.continent.in_(["Europe", "Asia"]) -) -assert {c.city for c in asia_and_europe} == {"Tokyo", "London", "Dublin"} - -# the Report class uses a simple integer primary key. So across two databases, -# a primary key will be repeated. The "identity_token" tracks in memory -# that these two identical primary keys are local to different databases. -newyork_report = newyork.reports[0] -tokyo_report = tokyo.reports[0] - -assert inspect(newyork_report).identity_key == (Report, (1,), "north_america") -assert inspect(tokyo_report).identity_key == (Report, (1,), "asia") - -# the token representing the originating shard is also available directly - -assert inspect(newyork_report).identity_token == "north_america" -assert inspect(tokyo_report).identity_token == "asia" |
