From 8ad0f4912ed72daced74ae4d1c939ebdbc517476 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Apr 2015 11:41:36 +0100 Subject: Stream ordering and out of order insertions. Handle the fact that events can be persisted out of order, and so to get the "current max" stream token becomes non trivial - as we need to make sure that *all* stream tokens less than the current max have also successfully been persisted. --- synapse/storage/util/__init__.py | 14 ++++ synapse/storage/util/id_generators.py | 126 ++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 synapse/storage/util/__init__.py create mode 100644 synapse/storage/util/id_generators.py (limited to 'synapse/storage/util') diff --git a/synapse/storage/util/__init__.py b/synapse/storage/util/__init__.py new file mode 100644 index 0000000000..c488b10d3c --- /dev/null +++ b/synapse/storage/util/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py new file mode 100644 index 0000000000..8f419323a7 --- /dev/null +++ b/synapse/storage/util/id_generators.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from collections import deque +import contextlib +import threading + + +class IdGenerator(object): + def __init__(self, table, column, store): + self.table = table + self.column = column + self.store = store + self._lock = threading.Lock() + self._next_id = None + + @defer.inlineCallbacks + def get_next(self): + with self._lock: + if not self._next_id: + res = yield self.store._execute_and_decode( + "IdGenerator_%s" % (self.table,), + "SELECT MAX(%s) as mx FROM %s" % (self.column, self.table,) + ) + + self._next_id = (res and res[0] and res[0]["mx"]) or 1 + + i = self._next_id + self._next_id += 1 + defer.returnValue(i) + + def get_next_txn(self, txn): + with self._lock: + if self._next_id: + i = self._next_id + self._next_id += 1 + return i + else: + txn.execute( + "SELECT MAX(%s) FROM %s" % (self.column, self.table,) + ) + + val, = txn.fetchone() + self._next_id = val or 2 + + return 1 + + +class StreamIdGenerator(object): + """Used to generate new stream ids when persisting events while keeping + track of which transactions have been completed. + + This allows us to get the "current" stream id, i.e. the stream id such that + all ids less than or equal to it have completed. This handles the fact that + persistence of events can complete out of order. + + Usage: + with stream_id_gen.get_next_txn(txn) as stream_id: + # ... persist event ... + """ + def __init__(self): + self._lock = threading.Lock() + + self._current_max = None + self._unfinished_ids = deque() + + def get_next_txn(self, txn): + """ + Usage: + with stream_id_gen.get_next_txn(txn) as stream_id: + # ... persist event ... + """ + with self._lock: + if not self._current_max: + self._compute_current_max(txn) + + self._current_max += 1 + next_id = self._current_max + + self._unfinished_ids.append(next_id) + + @contextlib.contextmanager + def manager(): + yield next_id + with self._lock: + self._unfinished_ids.remove(next_id) + + return manager() + + def get_max_token(self, store): + """Returns the maximum stream id such that all stream ids less than or + equal to it have been successfully persisted. + """ + with self._lock: + if self._unfinished_ids: + return self._unfinished_ids[0] - 1 + + if not self._current_max: + return store.runInteraction( + "_compute_current_max", + self._compute_current_max, + ) + + return self._current_max + + def _compute_current_max(self, txn): + txn.execute("SELECT MAX(stream_ordering) FROM events") + val, = txn.fetchone() + + self._current_max = int(val) if val else 1 + + return self._current_max -- cgit 1.4.1 From 3c741682e59a41fcc45a5b9a370c7f268be7729e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 Apr 2015 09:54:44 +0100 Subject: Correctly increment the _next_id initially --- synapse/storage/util/id_generators.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse/storage/util') diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 8f419323a7..2e2a408988 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -55,9 +55,11 @@ class IdGenerator(object): ) val, = txn.fetchone() - self._next_id = val or 2 + cur = val or 0 + cur += 1 + self._next_id = cur + 1 - return 1 + return cur class StreamIdGenerator(object): -- cgit 1.4.1 From a971fa9d584b35c35b675b65ed86faed77b46cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 15 Apr 2015 10:25:43 +0100 Subject: Use try..finally in contextlib.contextmanager --- synapse/storage/util/id_generators.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'synapse/storage/util') diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 2e2a408988..e5dec1c948 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -97,9 +97,11 @@ class StreamIdGenerator(object): @contextlib.contextmanager def manager(): - yield next_id - with self._lock: - self._unfinished_ids.remove(next_id) + try: + yield next_id + finally: + with self._lock: + self._unfinished_ids.remove(next_id) return manager() -- cgit 1.4.1 From 8558e1ec7338a1f60342024736614dec71d104ce Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 15:19:44 +0100 Subject: Make get_max_token into inlineCallbacks so that the lock works. --- synapse/storage/util/id_generators.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'synapse/storage/util') diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index e5dec1c948..9d461d5e96 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -105,21 +105,22 @@ class StreamIdGenerator(object): return manager() + @defer.inlineCallbacks def get_max_token(self, store): """Returns the maximum stream id such that all stream ids less than or equal to it have been successfully persisted. """ with self._lock: if self._unfinished_ids: - return self._unfinished_ids[0] - 1 + defer.returnValue(self._unfinished_ids[0] - 1) if not self._current_max: - return store.runInteraction( + yield store.runInteraction( "_compute_current_max", self._compute_current_max, ) - return self._current_max + defer.returnValue(self._current_max) def _compute_current_max(self, txn): txn.execute("SELECT MAX(stream_ordering) FROM events") -- cgit 1.4.1