Skip to content

Commit ca69e2f

Browse files
Merge pull request #137 from Geta/feature/filtering-improvements
Feature/filtering improvements & fixes
2 parents cc4d031 + cee6aa9 commit ca69e2f

File tree

2 files changed

+31
-36
lines changed

2 files changed

+31
-36
lines changed

src/Geta.Optimizely.Sitemaps/Areas/GetaOptimizelySitemaps/Pages/Shared/_Layout.cshtml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,15 @@
3838
</div>
3939
<div>
4040
<span class="fw-bold">Paths to include:</span>
41-
Sitemap will contain only pages from this virtual directory url. Separate multiple with ";".
41+
Include only pages whose URL starts with one of the given values.
42+
Separate multiple values with a semicolon (e.g., /en/blog;/en/guides).
43+
If left empty, no filtering will be applied.
4244
</div>
4345
<div>
4446
<span class="fw-bold">Paths to avoid:</span>
45-
Sitemap will not contain pages from this virtual directory url (works only if "Directory to include" left blank). Separate multiple with ";".
47+
Exclude pages whose URL starts with one of the given values.
48+
Separate multiple values with a semicolon (e.g., /en/locations;/en/destinations).
49+
If left empty, no filtering will be applied.
4650
</div>
4751
<div>
4852
<span class="fw-bold">Root page ID:</span>

src/Geta.Optimizely.Sitemaps/Utils/UrlFilter.cs

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,63 +8,54 @@
88
namespace Geta.Optimizely.Sitemaps.Utils
99
{
1010
/// <summary>
11-
/// Administrators are able to specify specific paths to exclude (blacklist) or include (whitelist) in sitemaps.
11+
/// Administrators are able to specify specific paths to include (whitelist) or exclude (blacklist) in sitemaps.
1212
/// This class is used to check this.
1313
/// </summary>
1414
public static class UrlFilter
1515
{
1616
public static bool IsUrlFiltered(string url, SitemapData sitemapConfig)
1717
{
18-
var whiteList = sitemapConfig.PathsToInclude;
19-
var blackList = sitemapConfig.PathsToAvoid;
20-
21-
return IsNotInWhiteList(url, whiteList) || IsInBlackList(url, blackList);
18+
// URL is removed if it fails whitelist or fails blacklist checks
19+
return !IsAllowedByWhitelist(url, sitemapConfig.PathsToInclude) ||
20+
!IsAllowedByBlacklist(url, sitemapConfig.PathsToAvoid);
2221
}
2322

24-
private static bool IsNotInWhiteList(string url, IList<string> paths)
23+
private static bool IsAllowedByWhitelist(string url, IList<string> whitelist)
2524
{
26-
return IsPathInUrl(url, paths, true);
27-
}
25+
if (whitelist == null || whitelist.Count == 0)
26+
{
27+
// if whitelist is empty, then everything is allowed
28+
return true;
29+
}
2830

29-
private static bool IsInBlackList(string url, IList<string> paths)
30-
{
31-
return IsPathInUrl(url, paths, false);
31+
// otherwise - url has to match at least one path
32+
return whitelist.Any(path => IsMatch(url, path));
3233
}
3334

34-
private static bool IsPathInUrl(string url, ICollection<string> paths, bool mustContainPath)
35+
private static bool IsAllowedByBlacklist(string url, IList<string> blacklist)
3536
{
36-
if (paths == null || paths.Count <= 0)
37+
if (blacklist == null || blacklist.Count == 0)
3738
{
38-
return false;
39+
// if blacklist is empty, then everything is allowed
40+
return true;
3941
}
4042

41-
var anyPathIsInUrl = paths.Any(x =>
42-
{
43-
var dir = AddStartSlash(AddTailingSlash(x.ToLower().Trim()));
44-
return url.ToLower().StartsWith(dir);
45-
});
46-
47-
return anyPathIsInUrl != mustContainPath;
43+
// otherwise - url is not allowed if it matches any of the paths
44+
return !blacklist.Any(path => IsMatch(url, path));
4845
}
4946

50-
private static string AddTailingSlash(string url)
47+
private static bool IsMatch(string url, string path)
5148
{
52-
if (!url.EndsWith('/'))
53-
{
54-
url += "/";
55-
}
56-
57-
return url;
49+
var normalizedUrl = Normalize(url);
50+
var normalizedPath = Normalize(path);
51+
return normalizedUrl.StartsWith(normalizedPath);
5852
}
5953

60-
private static string AddStartSlash(string url)
54+
private static string Normalize(string value)
6155
{
62-
if (!url.StartsWith('/'))
63-
{
64-
url = "/" + url;
65-
}
56+
var transformedValue = value?.ToLower().Trim().TrimStart('/').TrimEnd('/');
6657

67-
return url;
58+
return string.IsNullOrWhiteSpace(transformedValue) ? "/" : $"/{transformedValue}/";
6859
}
6960
}
7061
}

0 commit comments

Comments
 (0)