|
8 | 8 | namespace Geta.Optimizely.Sitemaps.Utils |
9 | 9 | { |
10 | 10 | /// <summary> |
11 | | - /// Administrators are able to specify specific paths to exclude (blacklist) or include (whitelist) in sitemaps. |
| 11 | + /// Administrators are able to specify specific paths to include (whitelist) or exclude (blacklist) in sitemaps. |
12 | 12 | /// This class is used to check this. |
13 | 13 | /// </summary> |
14 | 14 | public static class UrlFilter |
15 | 15 | { |
16 | 16 | public static bool IsUrlFiltered(string url, SitemapData sitemapConfig) |
17 | 17 | { |
18 | | - var whiteList = sitemapConfig.PathsToInclude; |
19 | | - var blackList = sitemapConfig.PathsToAvoid; |
20 | | - |
21 | | - return IsNotInWhiteList(url, whiteList) || IsInBlackList(url, blackList); |
| 18 | + // URL is removed if it fails whitelist or fails blacklist checks |
| 19 | + return !IsAllowedByWhitelist(url, sitemapConfig.PathsToInclude) || |
| 20 | + !IsAllowedByBlacklist(url, sitemapConfig.PathsToAvoid); |
22 | 21 | } |
23 | 22 |
|
24 | | - private static bool IsNotInWhiteList(string url, IList<string> paths) |
| 23 | + private static bool IsAllowedByWhitelist(string url, IList<string> whitelist) |
25 | 24 | { |
26 | | - return IsPathInUrl(url, paths, true); |
27 | | - } |
| 25 | + if (whitelist == null || whitelist.Count == 0) |
| 26 | + { |
| 27 | + // if whitelist is empty, then everything is allowed |
| 28 | + return true; |
| 29 | + } |
28 | 30 |
|
29 | | - private static bool IsInBlackList(string url, IList<string> paths) |
30 | | - { |
31 | | - return IsPathInUrl(url, paths, false); |
| 31 | + // otherwise - url has to match at least one path |
| 32 | + return whitelist.Any(path => IsMatch(url, path)); |
32 | 33 | } |
33 | 34 |
|
34 | | - private static bool IsPathInUrl(string url, ICollection<string> paths, bool mustContainPath) |
| 35 | + private static bool IsAllowedByBlacklist(string url, IList<string> blacklist) |
35 | 36 | { |
36 | | - if (paths == null || paths.Count <= 0) |
| 37 | + if (blacklist == null || blacklist.Count == 0) |
37 | 38 | { |
38 | | - return false; |
| 39 | + // if blacklist is empty, then everything is allowed |
| 40 | + return true; |
39 | 41 | } |
40 | 42 |
|
41 | | - var anyPathIsInUrl = paths.Any(x => |
42 | | - { |
43 | | - var dir = AddStartSlash(AddTailingSlash(x.ToLower().Trim())); |
44 | | - return url.ToLower().StartsWith(dir); |
45 | | - }); |
46 | | - |
47 | | - return anyPathIsInUrl != mustContainPath; |
| 43 | + // otherwise - url is not allowed if it matches any of the paths |
| 44 | + return !blacklist.Any(path => IsMatch(url, path)); |
48 | 45 | } |
49 | 46 |
|
50 | | - private static string AddTailingSlash(string url) |
| 47 | + private static bool IsMatch(string url, string path) |
51 | 48 | { |
52 | | - if (!url.EndsWith('/')) |
53 | | - { |
54 | | - url += "/"; |
55 | | - } |
56 | | - |
57 | | - return url; |
| 49 | + var normalizedUrl = Normalize(url); |
| 50 | + var normalizedPath = Normalize(path); |
| 51 | + return normalizedUrl.StartsWith(normalizedPath); |
58 | 52 | } |
59 | 53 |
|
60 | | - private static string AddStartSlash(string url) |
| 54 | + private static string Normalize(string value) |
61 | 55 | { |
62 | | - if (!url.StartsWith('/')) |
63 | | - { |
64 | | - url = "/" + url; |
65 | | - } |
| 56 | + var transformedValue = value?.ToLower().Trim().TrimStart('/').TrimEnd('/'); |
66 | 57 |
|
67 | | - return url; |
| 58 | + return string.IsNullOrWhiteSpace(transformedValue) ? "/" : $"/{transformedValue}/"; |
68 | 59 | } |
69 | 60 | } |
70 | 61 | } |
0 commit comments