From 8ad0f4912ed72daced74ae4d1c939ebdbc517476 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Apr 2015 11:41:36 +0100 Subject: Stream ordering and out of order insertions. Handle the fact that events can be persisted out of order, and so to get the "current max" stream token becomes non trivial - as we need to make sure that *all* stream tokens less than the current max have also successfully been persisted. --- synapse/storage/_base.py | 46 +------------ synapse/storage/events.py | 9 ++- synapse/storage/stream.py | 23 +------ synapse/storage/util/__init__.py | 14 ++++ synapse/storage/util/id_generators.py | 126 ++++++++++++++++++++++++++++++++++ 5 files changed, 153 insertions(+), 65 deletions(-) create mode 100644 synapse/storage/util/__init__.py create mode 100644 synapse/storage/util/id_generators.py diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 23289bbdd4..badf9a5f40 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -22,6 +22,8 @@ from synapse.util.logcontext import PreserveLoggingContext, LoggingContext from synapse.util.lrucache import LruCache import synapse.metrics +from util.id_generators import IdGenerator, StreamIdGenerator + from twisted.internet import defer from collections import namedtuple, OrderedDict @@ -29,7 +31,6 @@ import functools import simplejson as json import sys import time -import threading logger = logging.getLogger(__name__) @@ -232,46 +233,6 @@ class PerformanceCounters(object): return top_n_counters -class IdGenerator(object): - def __init__(self, table, column, store): - self.table = table - self.column = column - self.store = store - self._lock = threading.Lock() - self._next_id = None - - @defer.inlineCallbacks - def get_next(self): - with self._lock: - if not self._next_id: - res = yield self.store._execute_and_decode( - "IdGenerator_%s" % (self.table,), - "SELECT MAX(%s) as mx FROM %s" % (self.column, self.table,) - ) - - self._next_id = (res and res[0] and res[0]["mx"]) or 1 - - i = self._next_id - self._next_id += 1 - defer.returnValue(i) - - def get_next_txn(self, txn): - with self._lock: - if self._next_id: - i = self._next_id - self._next_id += 1 - return i - else: - txn.execute( - "SELECT MAX(%s) FROM %s" % (self.column, self.table,) - ) - - val, = txn.fetchone() - self._next_id = val or 2 - - return 1 - - class SQLBaseStore(object): _TXN_ID = 0 @@ -297,7 +258,7 @@ class SQLBaseStore(object): # Pretend the getEventCache is just another named cache caches_by_name["*getEvent*"] = self._get_event_cache - self._stream_id_gen = IdGenerator("events", "stream_ordering", self) + self._stream_id_gen = StreamIdGenerator() self._transaction_id_gen = IdGenerator("sent_transactions", "id", self) self._state_groups_id_gen = IdGenerator("state_groups", "id", self) self._access_tokens_id_gen = IdGenerator("access_tokens", "id", self) @@ -363,7 +324,6 @@ class SQLBaseStore(object): *args, **kwargs ) except self.database_engine.module.DatabaseError as e: - logger.warn("[TXN DEADLOCK] {%s} %r, %r", name, e.errno, e.sqlstate) if self.database_engine.is_deadlock(e): logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N) if i < N: diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3b3416716e..f066484c7e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -52,7 +52,6 @@ class EventsStore(SQLBaseStore): is_new_state=is_new_state, current_state=current_state, ) - self.get_room_events_max_id.invalidate() except _RollbackButIsFineException: pass @@ -97,7 +96,13 @@ class EventsStore(SQLBaseStore): self._get_event_cache.pop(event.event_id) if stream_ordering is None: - stream_ordering = self._stream_id_gen.get_next_txn(txn) + with self._stream_id_gen.get_next_txn(txn) as stream_ordering: + return self._persist_event_txn( + txn, event, context, backfilled, + stream_ordering=stream_ordering, + is_new_state=is_new_state, + current_state=current_state, + ) # We purposefully do this first since if we include a `current_state` # key, we *want* to update the `current_state_events` table diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index e6bb5a8077..9925f04bf7 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -413,12 +413,10 @@ class StreamStore(SQLBaseStore): "get_recent_events_for_room", get_recent_events_for_room_txn ) - @cached(num_args=0) + @defer.inlineCallbacks def get_room_events_max_id(self): - return self.runInteraction( - "get_room_events_max_id", - self._get_room_events_max_id_txn - ) + token = yield self._stream_id_gen.get_max_token(self) + defer.returnValue("s%d" % (token,)) @defer.inlineCallbacks def _get_min_token(self): @@ -433,21 +431,6 @@ class StreamStore(SQLBaseStore): defer.returnValue(self.min_token) - def _get_room_events_max_id_txn(self, txn): - txn.execute( - "SELECT MAX(stream_ordering) as m FROM events" - ) - - res = self.cursor_to_dict(txn) - - logger.debug("get_room_events_max_id: %s", res) - - if not res or not res[0] or not res[0]["m"]: - return "s0" - - key = res[0]["m"] - return "s%d" % (key,) - @staticmethod def _set_before_and_after(events, rows): for event, row in zip(events, rows): diff --git a/synapse/storage/util/__init__.py b/synapse/storage/util/__init__.py new file mode 100644 index 0000000000..c488b10d3c --- /dev/null +++ b/synapse/storage/util/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py new file mode 100644 index 0000000000..8f419323a7 --- /dev/null +++ b/synapse/storage/util/id_generators.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from collections import deque +import contextlib +import threading + + +class IdGenerator(object): + def __init__(self, table, column, store): + self.table = table + self.column = column + self.store = store + self._lock = threading.Lock() + self._next_id = None + + @defer.inlineCallbacks + def get_next(self): + with self._lock: + if not self._next_id: + res = yield self.store._execute_and_decode( + "IdGenerator_%s" % (self.table,), + "SELECT MAX(%s) as mx FROM %s" % (self.column, self.table,) + ) + + self._next_id = (res and res[0] and res[0]["mx"]) or 1 + + i = self._next_id + self._next_id += 1 + defer.returnValue(i) + + def get_next_txn(self, txn): + with self._lock: + if self._next_id: + i = self._next_id + self._next_id += 1 + return i + else: + txn.execute( + "SELECT MAX(%s) FROM %s" % (self.column, self.table,) + ) + + val, = txn.fetchone() + self._next_id = val or 2 + + return 1 + + +class StreamIdGenerator(object): + """Used to generate new stream ids when persisting events while keeping + track of which transactions have been completed. + + This allows us to get the "current" stream id, i.e. the stream id such that + all ids less than or equal to it have completed. This handles the fact that + persistence of events can complete out of order. + + Usage: + with stream_id_gen.get_next_txn(txn) as stream_id: + # ... persist event ... + """ + def __init__(self): + self._lock = threading.Lock() + + self._current_max = None + self._unfinished_ids = deque() + + def get_next_txn(self, txn): + """ + Usage: + with stream_id_gen.get_next_txn(txn) as stream_id: + # ... persist event ... + """ + with self._lock: + if not self._current_max: + self._compute_current_max(txn) + + self._current_max += 1 + next_id = self._current_max + + self._unfinished_ids.append(next_id) + + @contextlib.contextmanager + def manager(): + yield next_id + with self._lock: + self._unfinished_ids.remove(next_id) + + return manager() + + def get_max_token(self, store): + """Returns the maximum stream id such that all stream ids less than or + equal to it have been successfully persisted. + """ + with self._lock: + if self._unfinished_ids: + return self._unfinished_ids[0] - 1 + + if not self._current_max: + return store.runInteraction( + "_compute_current_max", + self._compute_current_max, + ) + + return self._current_max + + def _compute_current_max(self, txn): + txn.execute("SELECT MAX(stream_ordering) FROM events") + val, = txn.fetchone() + + self._current_max = int(val) if val else 1 + + return self._current_max -- cgit 1.4.1