From f8bfcd7e0d2fc6399eb654a41773cd603b4037fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Mar 2018 23:20:54 +0000 Subject: Provide a means to pass a timestamp to purge_history --- docs/admin_api/purge_history_api.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'docs') diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index a3a17e9f9f..acf1bc5749 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -8,9 +8,9 @@ Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. -The API is simply: +The API is: -``POST /_matrix/client/r0/admin/purge_history//`` +``POST /_matrix/client/r0/admin/purge_history/[/]`` including an ``access_token`` of a server admin. @@ -25,3 +25,10 @@ To delete local events as well, set ``delete_local_events`` in the body: { "delete_local_events": true } + +The caller must specify the point in the room to purge up to. This can be +specified by including an event_id in the URI, or by setting a +``purge_up_to_event_id`` or ``purge_up_to_ts`` in the request body. If an event +id is given, that event (and others at the same graph depth) will be retained. +If ``purge_up_to_ts`` is given, it should be a timestamp since the unix epoch, +in milliseconds. -- cgit 1.5.1 From 20f40348d4ea55cc5b98528673e26bac7396a3cb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Mar 2018 19:59:24 +0000 Subject: Factor run_in_background out from preserve_fn It annoys me that we create temporary function objects when there's really no need for it. Let's factor the gubbins out of preserve_fn and start using it. --- docs/log_contexts.rst | 8 +++---- synapse/util/logcontext.py | 53 +++++++++++++++++++++++++--------------------- 2 files changed, 33 insertions(+), 28 deletions(-) (limited to 'docs') diff --git a/docs/log_contexts.rst b/docs/log_contexts.rst index b19b7fa1ea..82ac4f91e5 100644 --- a/docs/log_contexts.rst +++ b/docs/log_contexts.rst @@ -279,9 +279,9 @@ Obviously that option means that the operations done in that might be fixed by setting a different logcontext via a ``with LoggingContext(...)`` in ``background_operation``). -The second option is to use ``logcontext.preserve_fn``, which wraps a function -so that it doesn't reset the logcontext even when it returns an incomplete -deferred, and adds a callback to the returned deferred to reset the +The second option is to use ``logcontext.run_in_background``, which wraps a +function so that it doesn't reset the logcontext even when it returns an +incomplete deferred, and adds a callback to the returned deferred to reset the logcontext. In other words, it turns a function that follows the Synapse rules about logcontexts and Deferreds into one which behaves more like an external function — the opposite operation to that described in the previous section. @@ -293,7 +293,7 @@ It can be used like this: def do_request_handling(): yield foreground_operation() - logcontext.preserve_fn(background_operation)() + logcontext.run_in_background(background_operation) # this will now be logged against the request context logger.debug("Request handling complete") diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a8dea15c1b..d660ec785b 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -292,36 +292,41 @@ class PreserveLoggingContext(object): def preserve_fn(f): - """Wraps a function, to ensure that the current context is restored after + """Function decorator which wraps the function with run_in_background""" + def g(*args, **kwargs): + return run_in_background(f, *args, **kwargs) + return g + + +def run_in_background(f, *args, **kwargs): + """Calls a function, ensuring that the current context is restored after return from the function, and that the sentinel context is set once the deferred returned by the funtion completes. Useful for wrapping functions that return a deferred which you don't yield on. """ - def g(*args, **kwargs): - current = LoggingContext.current_context() - res = f(*args, **kwargs) - if isinstance(res, defer.Deferred) and not res.called: - # The function will have reset the context before returning, so - # we need to restore it now. - LoggingContext.set_current_context(current) - - # The original context will be restored when the deferred - # completes, but there is nothing waiting for it, so it will - # get leaked into the reactor or some other function which - # wasn't expecting it. We therefore need to reset the context - # here. - # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) - res.addBoth(_set_context_cb, LoggingContext.sentinel) - return res - return g + current = LoggingContext.current_context() + res = f(*args, **kwargs) + if isinstance(res, defer.Deferred) and not res.called: + # The function will have reset the context before returning, so + # we need to restore it now. + LoggingContext.set_current_context(current) + + # The original context will be restored when the deferred + # completes, but there is nothing waiting for it, so it will + # get leaked into the reactor or some other function which + # wasn't expecting it. We therefore need to reset the context + # here. + # + # (If this feels asymmetric, consider it this way: we are + # effectively forking a new thread of execution. We are + # probably currently within a ``with LoggingContext()`` block, + # which is supposed to have a single entry and exit point. But + # by spawning off another deferred, we are effectively + # adding a new exit point.) + res.addBoth(_set_context_cb, LoggingContext.sentinel) + return res def make_deferred_yieldable(deferred): -- cgit 1.5.1 From e48c7aac4d827b66182adf80ab9804f42db186c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Mar 2018 11:47:28 +0000 Subject: Add transactional API to history purge Make the purge request return quickly, and allow scripts to poll for updates. --- docs/admin_api/purge_history_api.rst | 27 +++++++++ synapse/handlers/message.py | 104 +++++++++++++++++++++++++++++++++-- synapse/rest/client/v1/admin.py | 38 ++++++++++++- 3 files changed, 161 insertions(+), 8 deletions(-) (limited to 'docs') diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index acf1bc5749..ea2922da5c 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -32,3 +32,30 @@ specified by including an event_id in the URI, or by setting a id is given, that event (and others at the same graph depth) will be retained. If ``purge_up_to_ts`` is given, it should be a timestamp since the unix epoch, in milliseconds. + +The API starts the purge running, and returns immediately with a JSON body with +a purge id: + +.. code:: json + + { + "purge_id": "" + } + +Purge status query +------------------ + +It is possible to poll for updates on recent purges with a second API; + +``GET /_matrix/client/r0/admin/purge_history_status/`` + +(again, with a suitable ``access_token``). This API returns a JSON body like +the following: + +.. code:: json + + { + "status": "active" + } + +The status will be one of ``active``, ``complete``, or ``failed``. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6eb8d19dc9..42aab91c50 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, reactor +from twisted.python.failure import Failure from synapse.api.constants import EventTypes, Membership from synapse.api.errors import AuthError, Codes, SynapseError @@ -24,9 +25,10 @@ from synapse.types import ( UserID, RoomAlias, RoomStreamToken, ) from synapse.util.async import run_on_reactor, ReadWriteLock, Limiter -from synapse.util.logcontext import preserve_fn +from synapse.util.logcontext import preserve_fn, run_in_background from synapse.util.metrics import measure_func from synapse.util.frozenutils import unfreeze +from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client from synapse.replication.http.send_event import send_event_to_master @@ -41,6 +43,36 @@ import ujson logger = logging.getLogger(__name__) +class PurgeStatus(object): + """Object tracking the status of a purge request + + This class contains information on the progress of a purge request, for + return by get_purge_status. + + Attributes: + status (int): Tracks whether this request has completed. One of + STATUS_{ACTIVE,COMPLETE,FAILED} + """ + + STATUS_ACTIVE = 0 + STATUS_COMPLETE = 1 + STATUS_FAILED = 2 + + STATUS_TEXT = { + STATUS_ACTIVE: "active", + STATUS_COMPLETE: "complete", + STATUS_FAILED: "failed", + } + + def __init__(self): + self.status = PurgeStatus.STATUS_ACTIVE + + def asdict(self): + return { + "status": PurgeStatus.STATUS_TEXT[self.status] + } + + class MessageHandler(BaseHandler): def __init__(self, hs): @@ -51,25 +83,87 @@ class MessageHandler(BaseHandler): self.pagination_lock = ReadWriteLock() self._purges_in_progress_by_room = set() + # map from purge id to PurgeStatus + self._purges_by_id = {} - @defer.inlineCallbacks - def purge_history(self, room_id, topological_ordering, - delete_local_events=False): + def start_purge_history(self, room_id, topological_ordering, + delete_local_events=False): + """Start off a history purge on a room. + + Args: + room_id (str): The room to purge from + + topological_ordering (int): minimum topo ordering to preserve + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + str: unique ID for this purge transaction. + """ if room_id in self._purges_in_progress_by_room: raise SynapseError( 400, "History purge already in progress for %s" % (room_id, ), ) + purge_id = random_string(16) + + # we log the purge_id here so that it can be tied back to the + # request id in the log lines. + logger.info("[purge] starting purge_id %s", purge_id) + + self._purges_by_id[purge_id] = PurgeStatus() + run_in_background( + self._purge_history, + purge_id, room_id, topological_ordering, delete_local_events, + ) + return purge_id + + @defer.inlineCallbacks + def _purge_history(self, purge_id, room_id, topological_ordering, + delete_local_events): + """Carry out a history purge on a room. + + Args: + purge_id (str): The id for this purge + room_id (str): The room to purge from + topological_ordering (int): minimum topo ordering to preserve + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + Deferred + """ self._purges_in_progress_by_room.add(room_id) try: with (yield self.pagination_lock.write(room_id)): yield self.store.purge_history( room_id, topological_ordering, delete_local_events, ) + logger.info("[purge] complete") + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE + except Exception: + logger.error("[purge] failed: %s", Failure().getTraceback().rstrip()) + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED finally: self._purges_in_progress_by_room.discard(room_id) + # remove the purge from the list 24 hours after it completes + def clear_purge(): + del self._purges_by_id[purge_id] + reactor.callLater(24 * 3600, clear_purge) + + def get_purge_status(self, purge_id): + """Get the current status of an active purge + + Args: + purge_id (str): purge_id returned by start_purge_history + + Returns: + PurgeStatus|None + """ + return self._purges_by_id.get(purge_id) + @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, as_client_event=True, event_filter=None): diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index dcf6215dad..303419d281 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.api.constants import Membership -from synapse.api.errors import AuthError, SynapseError, Codes +from synapse.api.errors import AuthError, SynapseError, Codes, NotFoundError from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request @@ -185,12 +185,43 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): errcode=Codes.BAD_JSON, ) - yield self.handlers.message_handler.purge_history( + purge_id = yield self.handlers.message_handler.start_purge_history( room_id, depth, delete_local_events=delete_local_events, ) - defer.returnValue((200, {})) + defer.returnValue((200, { + "purge_id": purge_id, + })) + + +class PurgeHistoryStatusRestServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns( + "/admin/purge_history_status/(?P[^/]+)" + ) + + def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer) + """ + super(PurgeHistoryStatusRestServlet, self).__init__(hs) + self.handlers = hs.get_handlers() + + @defer.inlineCallbacks + def on_GET(self, request, purge_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + + if not is_admin: + raise AuthError(403, "You are not a server admin") + + purge_status = self.handlers.message_handler.get_purge_status(purge_id) + if purge_status is None: + raise NotFoundError("purge id '%s' not found" % purge_id) + + defer.returnValue((200, purge_status.asdict())) class DeactivateAccountRestServlet(ClientV1RestServlet): @@ -561,6 +592,7 @@ class SearchUsersRestServlet(ClientV1RestServlet): def register_servlets(hs, http_server): WhoisRestServlet(hs).register(http_server) PurgeMediaCacheRestServlet(hs).register(http_server) + PurgeHistoryStatusRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) PurgeHistoryRestServlet(hs).register(http_server) UsersRestServlet(hs).register(http_server) -- cgit 1.5.1 From c33c1ceddd5da8195b38059dce31255209075ba2 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 14 Mar 2018 11:09:08 -0600 Subject: OCD: Make the event_creator routes regex a code block All the others are code blocks, so this one should be to (currently it is a blockquote). Signed-off-by: Travis Ralston --- docs/workers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/workers.rst b/docs/workers.rst index dee04bbf3e..80f8d2181a 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -230,7 +230,7 @@ file. For example:: ``synapse.app.event_creator`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Handles non-state event creation. It can handle REST endpoints matching: +Handles non-state event creation. It can handle REST endpoints matching:: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send -- cgit 1.5.1 From 0ad5125814dc18a79423740ac54f96e16a427758 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 15 Mar 2018 11:05:42 +0000 Subject: Update purge_history_api.rst clarify that `purge_history` will not purge state --- docs/admin_api/purge_history_api.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'docs') diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index ea2922da5c..2da833c827 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -16,9 +16,11 @@ including an ``access_token`` of a server admin. By default, events sent by local users are not deleted, as they may represent the only copies of this content in existence. (Events sent by remote users are -deleted, and room state data before the cutoff is always removed). +deleted.) -To delete local events as well, set ``delete_local_events`` in the body: +Room state data (such as joins, leaves, topic) is always preserved. + +To delete local message events as well, set ``delete_local_events`` in the body: .. code:: json -- cgit 1.5.1