From e76d1135dd26305e0ff4c5d8e41b9dff204d72cf Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 20 Sep 2017 01:32:42 +0100 Subject: Invalidate signing key cache when we gat an update This might make the cache slightly more efficient. --- synapse/storage/keys.py | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 3b5e0a4fb9..87aeaf71d6 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -113,30 +113,37 @@ class KeyStore(SQLBaseStore): keys[key_id] = key defer.returnValue(keys) - @defer.inlineCallbacks def store_server_verify_key(self, server_name, from_server, time_now_ms, verify_key): """Stores a NACL verification key for the given server. Args: server_name (str): The name of the server. - key_id (str): The version of the key for the server. from_server (str): Where the verification key was looked up - ts_now_ms (int): The time now in milliseconds - verification_key (VerifyKey): The NACL verify key. + time_now_ms (int): The time now in milliseconds + verify_key (nacl.signing.VerifyKey): The NACL verify key. """ - yield self._simple_upsert( - table="server_signature_keys", - keyvalues={ - "server_name": server_name, - "key_id": "%s:%s" % (verify_key.alg, verify_key.version), - }, - values={ - "from_server": from_server, - "ts_added_ms": time_now_ms, - "verify_key": buffer(verify_key.encode()), - }, - desc="store_server_verify_key", - ) + key_id = "%s:%s" % (verify_key.alg, verify_key.version) + + def _txn(txn): + self._simple_upsert_txn( + txn, + table="server_signature_keys", + keyvalues={ + "server_name": server_name, + "key_id": key_id, + }, + values={ + "from_server": from_server, + "ts_added_ms": time_now_ms, + "verify_key": buffer(verify_key.encode()), + }, + ) + txn.call_after( + self._get_server_verify_key.invalidate, + (server_name, key_id) + ) + + return self.runInteraction("store_server_verify_key", _txn) def store_server_keys_json(self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes): -- cgit 1.4.1 From 9ccb4226ba0824a1468c4e8f0abe91aca3381862 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 12:18:06 +0100 Subject: Delete expired url cache data --- synapse/rest/media/v1/filepath.py | 43 ++++++++++- synapse/rest/media/v1/preview_url_resource.py | 90 +++++++++++++++++++++- synapse/storage/media_repository.py | 61 +++++++++++++++ synapse/storage/prepare_database.py | 2 +- .../storage/schema/delta/44/expire_url_cache.sql | 17 ++++ 5 files changed, 208 insertions(+), 5 deletions(-) create mode 100644 synapse/storage/schema/delta/44/expire_url_cache.sql (limited to 'synapse/storage') diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index d92b7ff337..c5d43209f9 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -73,19 +73,58 @@ class MediaFilePaths(object): ) def url_cache_filepath(self, media_id): + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf return os.path.join( self.base_path, "url_cache", - media_id[0:2], media_id[2:4], media_id[4:] + media_id[:10], media_id[11:] ) + def url_cache_filepath_dirs_to_delete(self, media_id): + "The dirs to try and remove if we delete the media_id file" + return [ + os.path.join( + self.base_path, "url_cache", + media_id[:10], + ), + ] + def url_cache_thumbnail(self, media_id, width, height, content_type, method): + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % ( width, height, top_level_type, sub_type, method ) + return os.path.join( self.base_path, "url_cache_thumbnails", - media_id[0:2], media_id[2:4], media_id[4:], + media_id[:10], media_id[11:], file_name ) + + def url_cache_thumbnail_directory(self, media_id): + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + + return os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], media_id[11:], + ) + + def url_cache_thumbnail_dirs_to_delete(self, media_id): + "The dirs to try and remove if we delete the media_id thumbnails" + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + return [ + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], media_id[11:], + ), + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], + ), + ] diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index b81a336c5d..c5ba83ddfd 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -36,6 +36,9 @@ import cgi import ujson as json import urlparse import itertools +import datetime +import errno +import shutil import logging logger = logging.getLogger(__name__) @@ -70,6 +73,10 @@ class PreviewUrlResource(Resource): self.downloads = {} + self._cleaner_loop = self.clock.looping_call( + self._expire_url_cache_data, 30 * 10000 + ) + def render_GET(self, request): self._async_render_GET(request) return NOT_DONE_YET @@ -253,8 +260,7 @@ class PreviewUrlResource(Resource): # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? - # XXX: horrible duplication with base_resource's _download_remote_file() - file_id = random_string(24) + file_id = datetime.date.today().isoformat() + '_' + random_string(16) fname = self.filepaths.url_cache_filepath(file_id) self.media_repo._makedirs(fname) @@ -328,6 +334,86 @@ class PreviewUrlResource(Resource): "etag": headers["ETag"][0] if "ETag" in headers else None, }) + @defer.inlineCallbacks + def _expire_url_cache_data(self): + """Clean up expired url cache content, media and thumbnails. + """ + now = self.clock.time_msec() + + # First we delete expired url cache entries + media_ids = yield self.store.get_expired_url_cache(now) + + removed_media = [] + for media_id in media_ids: + fname = self.filepaths.url_cache_filepath(media_id) + try: + os.remove(fname) + except OSError as e: + # If the path doesn't exist, meh + if e.errno != errno.ENOENT: + logger.warn("Failed to remove media: %r: %s", media_id, e) + continue + + removed_media.append(media_id) + + try: + dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) + for dir in dirs: + os.rmdir(dir) + except: + pass + + yield self.store.delete_url_cache(removed_media) + + logger.info("Deleted %d entries from url cache", len(removed_media)) + + # Now we delete old images associated with the url cache. + # These may be cached for a bit on the client (i.e., they + # may have a room open with a preview url thing open). + # So we wait a couple of days before deleting, just in case. + expire_before = now - 2 * 24 * 60 * 60 * 1000 + yield self.store.get_url_cache_media_before(expire_before) + + removed_media = [] + for media_id in media_ids: + fname = self.filepaths.url_cache_filepath(media_id) + try: + os.remove(fname) + except OSError as e: + # If the path doesn't exist, meh + if e.errno != errno.ENOENT: + logger.warn("Failed to remove media: %r: %s", media_id, e) + continue + + try: + dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) + for dir in dirs: + os.rmdir(dir) + except: + pass + + thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id) + try: + shutil.rmtree(thumbnail_dir) + except OSError as e: + # If the path doesn't exist, meh + if e.errno != errno.ENOENT: + logger.warn("Failed to remove media: %r: %s", media_id, e) + continue + + removed_media.append(media_id) + + try: + dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id) + for dir in dirs: + os.rmdir(dir) + except: + pass + + yield self.store.delete_url_cache_media(removed_media) + + logger.info("Deleted %d media from url cache", len(removed_media)) + def decode_and_calc_og(body, media_uri, request_encoding=None): from lxml import etree diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 82bb61b811..5cca14ccb2 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -238,3 +238,64 @@ class MediaRepositoryStore(SQLBaseStore): }, ) return self.runInteraction("delete_remote_media", delete_remote_media_txn) + + def get_expired_url_cache(self, now_ts): + sql = ( + "SELECT media_id FROM local_media_repository_url_cache" + " WHERE download_ts + expires < ?" + " ORDER BY download_ts + expires ASC" + " LIMIT 100" + ) + + def _get_expired_url_cache_txn(txn): + txn.execute(sql, (now_ts,)) + return [row[0] for row in txn] + + return self.runInteraction("get_expired_url_cache", _get_expired_url_cache_txn) + + def delete_url_cache(self, media_ids): + sql = ( + "DELETE FROM local_media_repository_url_cache" + " WHERE media_id = ?" + ) + + def _delete_url_cache_txn(txn): + txn.executemany(sql, [(media_id) for media_id in media_ids]) + + return self.runInteraction("delete_url_cache", _delete_url_cache_txn) + + def get_url_cache_media_before(self, before_ts): + sql = ( + "SELECT media_id FROM local_media_repository" + " WHERE created_ts < ?" + " ORDER BY created_ts ASC" + " LIMIT 100" + ) + + def _get_url_cache_media_before_txn(txn): + txn.execute(sql, (before_ts,)) + return [row[0] for row in txn] + + return self.runInteraction( + "get_url_cache_media_before", _get_url_cache_media_before_txn, + ) + + def delete_url_cache_media(self, media_ids): + def _delete_url_cache_media_txn(txn): + sql = ( + "DELETE FROM local_media_repository" + " WHERE media_id = ?" + ) + + txn.executemany(sql, [(media_id) for media_id in media_ids]) + + sql = ( + "DELETE FROM local_media_repository_thumbnails" + " WHERE media_id = ?" + ) + + txn.executemany(sql, [(media_id) for media_id in media_ids]) + + return self.runInteraction( + "delete_url_cache_media", _delete_url_cache_media_txn, + ) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 72b670b83b..a0af8456f5 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 43 +SCHEMA_VERSION = 44 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/44/expire_url_cache.sql b/synapse/storage/schema/delta/44/expire_url_cache.sql new file mode 100644 index 0000000000..96202bd2a6 --- /dev/null +++ b/synapse/storage/schema/delta/44/expire_url_cache.sql @@ -0,0 +1,17 @@ +/* Copyright 2017 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE INDEX local_media_repository_url_idx ON local_media_repository(created_ts) WHERE url_cache IS NOT NULL; +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(download_ts + expires); -- cgit 1.4.1 From 77f1d24de3c696f52bc1ba6d0f61e82f03a9de7a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 12:23:15 +0100 Subject: More brackets --- synapse/storage/schema/delta/44/expire_url_cache.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/44/expire_url_cache.sql b/synapse/storage/schema/delta/44/expire_url_cache.sql index 96202bd2a6..997e790b6d 100644 --- a/synapse/storage/schema/delta/44/expire_url_cache.sql +++ b/synapse/storage/schema/delta/44/expire_url_cache.sql @@ -14,4 +14,4 @@ */ CREATE INDEX local_media_repository_url_idx ON local_media_repository(created_ts) WHERE url_cache IS NOT NULL; -CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(download_ts + expires); +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache((download_ts + expires)); -- cgit 1.4.1 From ae79764fe55ab15156b4f28658326bd2c9c0b937 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 12:37:53 +0100 Subject: Change expires column to expires_ts --- synapse/rest/media/v1/preview_url_resource.py | 4 ++-- synapse/storage/media_repository.py | 14 +++++++------- .../storage/schema/delta/44/expire_url_cache.sql | 21 ++++++++++++++++++++- 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index c5ba83ddfd..6f896ffb53 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -137,7 +137,7 @@ class PreviewUrlResource(Resource): cache_result = yield self.store.get_url_cache(url, ts) if ( cache_result and - cache_result["download_ts"] + cache_result["expires"] > ts and + cache_result["expires_ts"] > ts and cache_result["response_code"] / 100 == 2 ): respond_with_json_bytes( @@ -246,7 +246,7 @@ class PreviewUrlResource(Resource): url, media_info["response_code"], media_info["etag"], - media_info["expires"], + media_info["expires"] + media_info["created_ts"], json.dumps(og), media_info["filesystem_id"], media_info["created_ts"], diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 5cca14ccb2..b8a0dd0762 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -62,7 +62,7 @@ class MediaRepositoryStore(SQLBaseStore): def get_url_cache_txn(txn): # get the most recently cached result (relative to the given ts) sql = ( - "SELECT response_code, etag, expires, og, media_id, download_ts" + "SELECT response_code, etag, expires_ts, og, media_id, download_ts" " FROM local_media_repository_url_cache" " WHERE url = ? AND download_ts <= ?" " ORDER BY download_ts DESC LIMIT 1" @@ -74,7 +74,7 @@ class MediaRepositoryStore(SQLBaseStore): # ...or if we've requested a timestamp older than the oldest # copy in the cache, return the oldest copy (if any) sql = ( - "SELECT response_code, etag, expires, og, media_id, download_ts" + "SELECT response_code, etag, expires_ts, og, media_id, download_ts" " FROM local_media_repository_url_cache" " WHERE url = ? AND download_ts > ?" " ORDER BY download_ts ASC LIMIT 1" @@ -86,14 +86,14 @@ class MediaRepositoryStore(SQLBaseStore): return None return dict(zip(( - 'response_code', 'etag', 'expires', 'og', 'media_id', 'download_ts' + 'response_code', 'etag', 'expires_ts', 'og', 'media_id', 'download_ts' ), row)) return self.runInteraction( "get_url_cache", get_url_cache_txn ) - def store_url_cache(self, url, response_code, etag, expires, og, media_id, + def store_url_cache(self, url, response_code, etag, expires_ts, og, media_id, download_ts): return self._simple_insert( "local_media_repository_url_cache", @@ -101,7 +101,7 @@ class MediaRepositoryStore(SQLBaseStore): "url": url, "response_code": response_code, "etag": etag, - "expires": expires, + "expires_ts": expires_ts, "og": og, "media_id": media_id, "download_ts": download_ts, @@ -242,8 +242,8 @@ class MediaRepositoryStore(SQLBaseStore): def get_expired_url_cache(self, now_ts): sql = ( "SELECT media_id FROM local_media_repository_url_cache" - " WHERE download_ts + expires < ?" - " ORDER BY download_ts + expires ASC" + " WHERE expires_ts < ?" + " ORDER BY expires_ts ASC" " LIMIT 100" ) diff --git a/synapse/storage/schema/delta/44/expire_url_cache.sql b/synapse/storage/schema/delta/44/expire_url_cache.sql index 997e790b6d..9475d53e84 100644 --- a/synapse/storage/schema/delta/44/expire_url_cache.sql +++ b/synapse/storage/schema/delta/44/expire_url_cache.sql @@ -14,4 +14,23 @@ */ CREATE INDEX local_media_repository_url_idx ON local_media_repository(created_ts) WHERE url_cache IS NOT NULL; -CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache((download_ts + expires)); + +-- we need to change `expires` to `expires_ts` so that we can index on it. SQLite doesn't support +-- indices on expressions until 3.9. +CREATE TABLE local_media_repository_url_cache_new( + url TEXT, + response_code INTEGER, + etag TEXT, + expires_ts BIGINT, + og TEXT, + media_id TEXT, + download_ts BIGINT +); + +INSERT INTO local_media_repository_url_cache_new + SELECT url, response_code, etag, expires + download_ts, og, media_id, download_ts FROM local_media_repository_url_cache; + +DROP TABLE local_media_repository_url_cache; +ALTER TABLE local_media_repository_url_cache_new RENAME TO local_media_repository_url_cache; + +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts); -- cgit 1.4.1 From 7a44c01d894d85a0eb829b4a82d1aeaff9a39ec9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 12:46:04 +0100 Subject: Fix typo --- synapse/storage/media_repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index b8a0dd0762..5e39daa210 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -260,7 +260,7 @@ class MediaRepositoryStore(SQLBaseStore): ) def _delete_url_cache_txn(txn): - txn.executemany(sql, [(media_id) for media_id in media_ids]) + txn.executemany(sql, [(media_id,) for media_id in media_ids]) return self.runInteraction("delete_url_cache", _delete_url_cache_txn) @@ -287,14 +287,14 @@ class MediaRepositoryStore(SQLBaseStore): " WHERE media_id = ?" ) - txn.executemany(sql, [(media_id) for media_id in media_ids]) + txn.executemany(sql, [(media_id,) for media_id in media_ids]) sql = ( "DELETE FROM local_media_repository_thumbnails" " WHERE media_id = ?" ) - txn.executemany(sql, [(media_id) for media_id in media_ids]) + txn.executemany(sql, [(media_id,) for media_id in media_ids]) return self.runInteraction( "delete_url_cache_media", _delete_url_cache_media_txn, -- cgit 1.4.1 From 93247a424a5068b088567fa98b6990e47608b7cb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 13:48:14 +0100 Subject: Only pull out local media that were for url cache --- synapse/storage/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 5e39daa210..1f2eab98e3 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -267,7 +267,7 @@ class MediaRepositoryStore(SQLBaseStore): def get_url_cache_media_before(self, before_ts): sql = ( "SELECT media_id FROM local_media_repository" - " WHERE created_ts < ?" + " WHERE created_ts < ? AND url_cache IS NOT NULL" " ORDER BY created_ts ASC" " LIMIT 100" ) -- cgit 1.4.1 From 4dc07e93a85f0f6e09a6763a7833ef935be1c417 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 14:10:33 +0100 Subject: Add old indices --- synapse/storage/schema/delta/44/expire_url_cache.sql | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/44/expire_url_cache.sql b/synapse/storage/schema/delta/44/expire_url_cache.sql index 9475d53e84..e2b775f038 100644 --- a/synapse/storage/schema/delta/44/expire_url_cache.sql +++ b/synapse/storage/schema/delta/44/expire_url_cache.sql @@ -34,3 +34,5 @@ DROP TABLE local_media_repository_url_cache; ALTER TABLE local_media_repository_url_cache_new RENAME TO local_media_repository_url_cache; CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts); +CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache(url, download_ts); +CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache(media_id); -- cgit 1.4.1 From 768f00dedbee83dd6bfb7c37bfadc511f7aeb10e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 14:27:27 +0100 Subject: Up the limits on number of url cache entries to delete at one time --- synapse/storage/media_repository.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 1f2eab98e3..7110a71279 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -244,7 +244,7 @@ class MediaRepositoryStore(SQLBaseStore): "SELECT media_id FROM local_media_repository_url_cache" " WHERE expires_ts < ?" " ORDER BY expires_ts ASC" - " LIMIT 100" + " LIMIT 500" ) def _get_expired_url_cache_txn(txn): @@ -269,7 +269,7 @@ class MediaRepositoryStore(SQLBaseStore): "SELECT media_id FROM local_media_repository" " WHERE created_ts < ? AND url_cache IS NOT NULL" " ORDER BY created_ts ASC" - " LIMIT 100" + " LIMIT 500" ) def _get_url_cache_media_before_txn(txn): -- cgit 1.4.1