about summary refs log tree commit diff
path: root/ExampleBots/ModerationBot/PolicyEngine.cs
blob: 5311637507638b4b60e5c6e89a223693076af154 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
using System.Diagnostics;
using System.Security.Cryptography;
using System.Text.Json;
using System.Text.RegularExpressions;
using ArcaneLibs.Extensions;
using LibMatrix;
using LibMatrix.EventTypes.Spec;
using LibMatrix.EventTypes.Spec.State;
using LibMatrix.Helpers;
using LibMatrix.Homeservers;
using LibMatrix.Interfaces;
using LibMatrix.RoomTypes;
using LibMatrix.Services;
using ModerationBot.AccountData;
using ModerationBot.StateEventTypes;
using Microsoft.Extensions.Logging;
using ModerationBot.StateEventTypes.Policies;
using ModerationBot.StateEventTypes.Policies.Implementations;

namespace ModerationBot;

public class PolicyEngine(AuthenticatedHomeserverGeneric hs, ILogger<ModerationBot> logger, ModerationBotConfiguration configuration, HomeserverResolverService hsResolver) {
    private Dictionary<string, PolicyList> PolicyListAccountData { get; set; } = new();
    public List<PolicyList> ActivePolicyLists { get; set; } = new();
    public List<BasePolicy> ActivePolicies { get; set; } = new();
    public Dictionary<string, List<BasePolicy>> ActivePoliciesByType { get; set; } = new();
    private GenericRoom? _logRoom;
    private GenericRoom? _controlRoom;

    public async Task ReloadActivePolicyLists() {
        var sw = Stopwatch.StartNew();

        var botData = await hs.GetAccountDataAsync<BotData>("gay.rory.moderation_bot_data");
        _logRoom ??= hs.GetRoom(botData.LogRoom ?? botData.ControlRoom);
        _controlRoom ??= hs.GetRoom(botData.ControlRoom);

        await _controlRoom?.SendMessageEventAsync(MessageFormatter.FormatSuccess("Reloading policy lists!"))!;
        await _logRoom?.SendMessageEventAsync(MessageFormatter.FormatSuccess("Reloading policy lists!"))!;

        var progressMessage = await _logRoom?.SendMessageEventAsync(MessageFormatter.FormatSuccess("0/? policy lists loaded"))!;

        var policyLists = new List<PolicyList>();
        try {
            PolicyListAccountData = await hs.GetAccountDataAsync<Dictionary<string, PolicyList>>("gay.rory.moderation_bot.policy_lists");
        }
        catch (MatrixException e) {
            if (e is not { ErrorCode: "M_NOT_FOUND" }) throw;
        }

        if (!PolicyListAccountData.ContainsKey(botData.DefaultPolicyRoom)) {
            PolicyListAccountData.Add(botData.DefaultPolicyRoom, new PolicyList() {
                Trusted = true
            });
            await hs.SetAccountDataAsync("gay.rory.moderation_bot.policy_lists", PolicyListAccountData);
        }

        var loadTasks = new List<Task<PolicyList>>();
        foreach (var (roomId, policyList) in PolicyListAccountData) {
            var room = hs.GetRoom(roomId);
            loadTasks.Add(LoadPolicyListAsync(room, policyList));
        }

        await foreach (var policyList in loadTasks.ToAsyncEnumerable()) {
            policyLists.Add(policyList);

            if (policyList.Policies.Count >= 256 || policyLists.Count == PolicyListAccountData.Count) {
                var progressMsgContent = MessageFormatter.FormatSuccess($"{policyLists.Count}/{PolicyListAccountData.Count} policy lists loaded, " +
                                                                        $"{policyLists.Sum(x => x.Policies.Count)} policies total, {sw.Elapsed} elapsed.")
                    .SetReplaceRelation<RoomMessageEventContent>(progressMessage.EventId);
                
                _logRoom?.SendMessageEventAsync(progressMsgContent);
            }
        }

        // Console.WriteLine($"Reloaded policy list data in {sw.Elapsed}");
        // await _logRoom.SendMessageEventAsync(MessageFormatter.FormatSuccess($"Done fetching {policyLists.Count} policy lists in {sw.Elapsed}!"));

        ActivePolicyLists = policyLists;
        ActivePolicies = await GetActivePolicies();
    }

    private async Task<PolicyList> LoadPolicyListAsync(GenericRoom room, PolicyList policyList) {
        policyList.Room = room;
        policyList.Policies.Clear();

        var stateEvents = room.GetFullStateAsync();
        await foreach (var stateEvent in stateEvents) {
            if (stateEvent != null && (
                    stateEvent.GetType.IsAssignableTo(typeof(BasePolicy))
                    || stateEvent.GetType.IsAssignableTo(typeof(PolicyRuleEventContent))
                )) {
                policyList.Policies.Add(stateEvent);
            }
        }

        // if (policyList.Policies.Count >= 1)
        // await _logRoom?.SendMessageEventAsync(
        // MessageFormatter.FormatSuccess($"Loaded {policyList.Policies.Count} policies for {MessageFormatter.HtmlFormatMention(room.RoomId)}!"))!;

        return policyList;
    }
    
    
    public async Task ReloadActivePolicyListById(string roomId) {
        if (!ActivePolicyLists.Any(x => x.Room.RoomId == roomId)) return;
        await LoadPolicyListAsync(hs.GetRoom(roomId), ActivePolicyLists.Single(x => x.Room.RoomId == roomId));
        ActivePolicies = await GetActivePolicies();
    }

    public async Task<List<BasePolicy>> GetActivePolicies() {
        var sw = Stopwatch.StartNew();
        List<BasePolicy> activePolicies = new();

        foreach (var activePolicyList in ActivePolicyLists) {
            foreach (var policyEntry in activePolicyList.Policies) {
                // TODO: implement rule translation
                BasePolicy policy = policyEntry.TypedContent is BasePolicy ? policyEntry.TypedContent as BasePolicy : policyEntry.RawContent.Deserialize<UnknownPolicy>();
                if (policy.Entity is null) continue;
                policy.PolicyList = activePolicyList;
                policy.OriginalEvent = policyEntry;
                activePolicies.Add(policy);
            }
        }

        Console.WriteLine($"Translated policy list data in {sw.Elapsed}");
        ActivePoliciesByType = activePolicies.GroupBy(x => x.GetType().Name).ToDictionary(x => x.Key, x => x.ToList());
        await _logRoom.SendMessageEventAsync(MessageFormatter.FormatSuccess($"Translated policy list data in {sw.GetElapsedAndRestart()}"));
        // await _logRoom.SendMessageEventAsync(MessageFormatter.FormatSuccess($"Built policy type map in {sw.GetElapsedAndRestart()}"));

        var summary = SummariseStateTypeCounts(activePolicies.Select(x => x.OriginalEvent).ToList());
        await _logRoom?.SendMessageEventAsync(new RoomMessageEventContent() {
            Body = summary.Raw,
            FormattedBody = summary.Html,
            Format = "org.matrix.custom.html"
        })!;

        return activePolicies;
    }

    public async Task<List<BasePolicy>> GetMatchingPolicies(StateEventResponse @event) {
        List<BasePolicy> matchingPolicies = new();
        if (@event.Sender == @hs.UserId) return matchingPolicies; //ignore self at all costs
        
        if (ActivePoliciesByType.TryGetValue(nameof(ServerPolicyRuleEventContent), out var serverPolicies)) {
            var userServer = @event.Sender.Split(':', 2)[1];
            matchingPolicies.AddRange(serverPolicies.Where(x => x.Entity == userServer));
        }

        if (ActivePoliciesByType.TryGetValue(nameof(UserPolicyRuleEventContent), out var userPolicies)) {
            matchingPolicies.AddRange(userPolicies.Where(x => x.Entity == @event.Sender));
        }

        if (@event.TypedContent is RoomMessageEventContent msgContent) {
            matchingPolicies.AddRange(await CheckMessageContent(@event));
            if (msgContent.MessageType == "m.text" || msgContent.MessageType == "m.notice") ; //TODO: implement word etc. filters
            if (msgContent.MessageType == "m.image" || msgContent.MessageType == "m.file" || msgContent.MessageType == "m.audio" || msgContent.MessageType == "m.video")
                matchingPolicies.AddRange(await CheckMedia(@event));
        }

        return matchingPolicies;
    }

#region Policy matching

    private async Task<List<BasePolicy>> CheckMessageContent(StateEventResponse @event) {
        var matchedRules = new List<BasePolicy>();
        var msgContent = @event.TypedContent as RoomMessageEventContent;
        
        if (ActivePoliciesByType.TryGetValue(nameof(MessagePolicyContainsText), out var messageContainsPolicies))
            foreach (var policy in messageContainsPolicies) {
                if((@msgContent?.Body?.ToLowerInvariant().Contains(policy.Entity.ToLowerInvariant()) ?? false) || (@msgContent?.FormattedBody?.ToLowerInvariant().Contains(policy.Entity.ToLowerInvariant()) ?? false))
                    matchedRules.Add(policy);
            }
            
        
        return matchedRules;
    }

    private async Task<List<BasePolicy>> CheckMedia(StateEventResponse @event) {
        var matchedRules = new List<BasePolicy>();
        var hashAlgo = SHA3_256.Create();

        var mxcUri = @event.RawContent["url"].GetValue<string>();

        //check server policies before bothering with hashes
        if (ActivePoliciesByType.TryGetValue(nameof(MediaPolicyHomeserver), out var mediaHomeserverPolicies))
            foreach (var policy in mediaHomeserverPolicies) {
                logger.LogInformation("Checking rule {rule}: {data}", policy.OriginalEvent.StateKey, policy.OriginalEvent.TypedContent.ToJson(ignoreNull: true, indent: false));
                policy.Entity = policy.Entity.Replace("\\*", ".*").Replace("\\?", ".");
                var regex = new Regex($"mxc://({policy.Entity})/.*", RegexOptions.Compiled | RegexOptions.IgnoreCase);
                if (regex.IsMatch(@event.RawContent["url"]!.GetValue<string>())) {
                    logger.LogInformation("{url} matched rule {rule}", @event.RawContent["url"], policy.ToJson(ignoreNull: true));
                    matchedRules.Add(policy);
                    // continue;
                }
            }

        var resolvedUri = await hsResolver.ResolveMediaUri(mxcUri.Split('/')[2], mxcUri);
        var uriHash = hashAlgo.ComputeHash(mxcUri.AsBytes().ToArray());
        byte[]? fileHash = null;

        try {
            fileHash = await hashAlgo.ComputeHashAsync(await hs.ClientHttpClient.GetStreamAsync(resolvedUri));
        }
        catch (Exception ex) {
            await _logRoom.SendMessageEventAsync(
                MessageFormatter.FormatException($"Error calculating file hash for {mxcUri} via {mxcUri.Split('/')[2]} ({resolvedUri}), retrying via {hs.BaseUrl}...",
                    ex));
            try {
                resolvedUri = await hsResolver.ResolveMediaUri(hs.BaseUrl, mxcUri);
                fileHash = await hashAlgo.ComputeHashAsync(await hs.ClientHttpClient.GetStreamAsync(resolvedUri));
            }
            catch (Exception ex2) {
                await _logRoom.SendMessageEventAsync(
                    MessageFormatter.FormatException($"Error calculating file hash via {hs.BaseUrl} ({resolvedUri})!", ex2));
            }
        }

        logger.LogInformation("Checking media {url} with hash {hash}", resolvedUri, fileHash);

        if (ActivePoliciesByType.ContainsKey(nameof(MediaPolicyFile)))
            foreach (MediaPolicyFile policy in ActivePoliciesByType[nameof(MediaPolicyFile)]) {
                logger.LogInformation("Checking rule {rule}: {data}", policy.OriginalEvent.StateKey, policy.OriginalEvent.TypedContent.ToJson(ignoreNull: true, indent: false));
                if (policy.Entity is not null && Convert.ToBase64String(uriHash).SequenceEqual(policy.Entity)) {
                    logger.LogInformation("{url} matched rule {rule} by uri hash", @event.RawContent["url"], policy.ToJson(ignoreNull: true));
                    matchedRules.Add(policy);
                    // continue;
                }
                else logger.LogInformation("uri hash {uriHash} did not match rule's {ruleUriHash}", Convert.ToHexString(uriHash), policy.Entity);

                if (policy.FileHash is not null && fileHash is not null && policy.FileHash == Convert.ToBase64String(fileHash)) {
                    logger.LogInformation("{url} matched rule {rule} by file hash", @event.RawContent["url"], policy.ToJson(ignoreNull: true));
                    matchedRules.Add(policy);
                    // continue;
                }
                else logger.LogInformation("file hash {fileHash} did not match rule's {ruleFileHash}", Convert.ToBase64String(fileHash), policy.FileHash);

                //check pixels every 10% of the way through the image using ImageSharp
                // var image = Image.Load(await _hs._httpClient.GetStreamAsync(resolvedUri));
            }
        else logger.LogInformation("No active media file policies");        
        // logger.LogInformation("{url} did not match any rules", @event.RawContent["url"]);

        return matchedRules;
    }

#endregion

#region Internal code

#region Summarisation

    private static (string Raw, string Html) SummariseStateTypeCounts(IList<StateEventResponse> states) {
        string raw = "Count | State type | Mapped type", html = "<table><tr><th>Count</th><th>State type</th><th>Mapped type</th></tr>";
        var groupedStates = states.GroupBy(x => x.Type).ToDictionary(x => x.Key, x => x.ToList()).OrderByDescending(x => x.Value.Count);
        foreach (var (type, stateGroup) in groupedStates) {
            raw += $"{stateGroup.Count} | {type} | {stateGroup[0].GetType.Name}";
            html += $"<tr><td>{stateGroup.Count}</td><td>{type}</td><td>{stateGroup[0].GetType.Name}</td></tr>";
        }

        html += "</table>";
        return (raw, html);
    }

#endregion

#endregion

}