summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2018-05-17 15:08:50 +0100
committerErik Johnston <erik@matrix.org>2018-05-17 15:09:10 +0100
commitc771c124d58a5f9c3be50b29e1b5c2011ac009d8 (patch)
treeca73da6cfc7d38d58f74ffcfe373b9a2313ebb97
parentAdd note about index in changelog (diff)
downloadsynapse-c771c124d58a5f9c3be50b29e1b5c2011ac009d8.tar.xz
Improve documentation and comments
-rw-r--r--synapse/storage/chunk_ordered_table.py13
-rw-r--r--synapse/util/katriel_bodlaender.py51
-rw-r--r--tests/storage/test_chunk_linearizer_table.py4
3 files changed, 58 insertions, 10 deletions
diff --git a/synapse/storage/chunk_ordered_table.py b/synapse/storage/chunk_ordered_table.py
index 33089c2c60..442d82cbb2 100644
--- a/synapse/storage/chunk_ordered_table.py
+++ b/synapse/storage/chunk_ordered_table.py
@@ -33,13 +33,18 @@ class ChunkDBOrderedListStore(OrderedListStore):
     """Used as the list store for room chunks, efficiently maintaining them in
     topological order on updates.
 
+    A room chunk is a connected portion of the room events DAG. As such it
+    inherits a DAG, i.e. if an event in one chunk references an event in a
+    second chunk, then we say that the first chunk references the second, and
+    thus forming a DAG.
+
     The class is designed for use inside transactions and so takes a
     transaction object in the constructor. This means that it needs to be
     re-instantiated in each transaction, so all state needs to be stored
     in the database.
 
     Internally the ordering is implemented using floats, and the average is
-    taken when a node is inserted inbetween other nodes. To avoid presicion
+    taken when a node is inserted between other nodes. To avoid precision
     errors a minimum difference between sucessive orderings is attempted to be
     kept; whenever the difference is too small we attempt to rebalance. See
     the `_rebalance` function for implementation details.
@@ -51,6 +56,10 @@ class ChunkDBOrderedListStore(OrderedListStore):
     edge is from B to A. This ensures that newer chunks get inserted at the
     end (rather than the start).
 
+    Note: Calls to `add_node` and `add_edge` cannot overlap for the same room,
+    and so callers should perform some form of per-room locking when using
+    this class.
+
     Args:
         txn
         room_id (str)
@@ -59,7 +68,7 @@ class ChunkDBOrderedListStore(OrderedListStore):
             in a range around the node, where the bounds are rounded to this
             number of digits.
         min_difference (int): A rebalance is triggered when the difference
-            between two successive orderings are less than the reverse of
+            between two successive orderings is less than the reciprocal of
             this.
     """
     def __init__(self,
diff --git a/synapse/util/katriel_bodlaender.py b/synapse/util/katriel_bodlaender.py
index b924a4cfdf..887a6b4681 100644
--- a/synapse/util/katriel_bodlaender.py
+++ b/synapse/util/katriel_bodlaender.py
@@ -16,8 +16,9 @@
 """This module contains an implementation of the Katriel-Bodlaender algorithm,
 which is used to do online topological ordering of graphs.
 
-Note that the ordering derived from the graph has the first node one with no
-incoming edges at the start, and the last node one with no outgoing edges.
+Note that the ordering derived from the graph is such that the source node of
+an edge comes before the target node of the edge, i.e. a graph of A -> B -> C
+would produce the ordering [A, B, C].
 
 This ordering is therefore opposite to what one might expect when considering
 the room DAG, as newer messages would be added to the start rather than the
@@ -25,15 +26,46 @@ end.
 
 ***We therefore invert the direction of edges when using the algorithm***
 
-See https://www.sciencedirect.com/science/article/pii/S0304397507006573
+See:
+    A tight analysis of the Katriel–Bodlaender algorithm for online topological
+    ordering
+    Hsiao-Fei Liua and Kun-Mao Chao
+    https://www.sciencedirect.com/science/article/pii/S0304397507006573
+and:
+    Online Topological Ordering
+    Irit Katriel and Hans L. Bodlaender
+    http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.78.7933 )
 """
 
 from abc import ABCMeta, abstractmethod
 
 
 class OrderedListStore(object):
-    """An abstract base class that is used to store a topological ordering of
-    a graph. Suitable for use with the Katriel-Bodlaender algorithm.
+    """An abstract base class that is used to store a graph and maintain a
+    topological consistent, total ordering.
+
+    Internally this uses the Katriel-Bodlaender algorithm, which requires the
+    store expose an interface for the total ordering that supports:
+
+        - Insertion of the node into the ordering either immediately before or
+          after another node.
+        - Deletion of the node from the ordering
+        - Comparing the relative ordering of two arbitary nodes
+        - Get the node immediately before or after a given node in the ordering
+
+    It also needs to be able to interact with the graph in the following ways:
+
+        - Query the number of edges from a node in the graph
+        - Query the number of edges into a node in the graph
+        - Add an edge to the graph
+
+
+    Users of subclasses should call `add_node` and `add_edge` whenever editing
+    the graph. The total ordering exposed will remain constant until the next
+    call to one of these methods.
+
+    Note: Calls to `add_node` and `add_edge` cannot overlap, and so callers
+    should perform some form of locking.
     """
 
     __metaclass__ = ABCMeta
@@ -53,7 +85,8 @@ class OrderedListStore(object):
 
     @abstractmethod
     def get_prev(self, node_id):
-        """Gets the node immediately before the given node
+        """Gets the node immediately before the given node in the topological
+        ordering.
 
         Args:
             node_id (str)
@@ -65,7 +98,8 @@ class OrderedListStore(object):
 
     @abstractmethod
     def get_next(self, node_id):
-        """Gets the node immediately after the given node
+        """Gets the node immediately after the given node in the topological
+        ordering.
 
         Args:
             node_id (str)
@@ -125,7 +159,8 @@ class OrderedListStore(object):
             list[tuple[float, str]]: Returns a list of tuple of an ordering
             term and the node ID. The ordering term can be used to sort the
             returned list.
-            The ordering is valid until subsequent calls to insert_* functions
+            The ordering is valid until subsequent calls to `add_edge`
+            functions
         """
         pass
 
diff --git a/tests/storage/test_chunk_linearizer_table.py b/tests/storage/test_chunk_linearizer_table.py
index 5ca436c555..9890dffd60 100644
--- a/tests/storage/test_chunk_linearizer_table.py
+++ b/tests/storage/test_chunk_linearizer_table.py
@@ -23,6 +23,10 @@ from synapse.storage.chunk_ordered_table import ChunkDBOrderedListStore
 
 
 class ChunkLinearizerStoreTestCase(tests.unittest.TestCase):
+    """Tests to ensure that the ordering and rebalancing functions of
+    ChunkDBOrderedListStore work as expected.
+    """
+
     def __init__(self, *args, **kwargs):
         super(ChunkLinearizerStoreTestCase, self).__init__(*args, **kwargs)