Skip to content

Commit b1a59e1

Browse files
committed
update check/article/images
Ensure article page has minimum length and includes visual assets. Also added more thorough code-testing. com.google.fonts/check/article/images On the Google Fonts profile. (issue fonttools#4653)
1 parent 7893873 commit b1a59e1

File tree

11 files changed

+155
-67
lines changed

11 files changed

+155
-67
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ A more detailed list of changes is available in the corresponding milestones for
2222
- Checks which validate the description files now also validate article files (issue #4730)
2323
- **[com.google.font/check/description/unsupported_elements]:** Also checks for wellformedness of HTML video tags. (issue #4730)
2424
- **[com.google.fonts/check/metadata/copyright_max_length], [com.google.fonts/check/metadata/nameid/copyright], [com.google.fonts/check/metadata/valid_copyright], [com.google.fonts/check/name/copyright_length]:** These checks have all been merged into [com.google.fonts/check/font_copyright]. (PR #4748 / issue #4735)
25+
- **EXPERIMENTAL - [com.google.fonts/check/article/images]:** Ensure article page has minimum length and includes visual assets. Also added more thorough code-testing. (issue #4653)
2526

2627
#### On the Type Network profile
2728
- **[com.typenetwork/check/usweightclass]:** Detects better "Semi", "Extra" and "Ultra" weights using a space on the name. (PR #4751)
Lines changed: 98 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,30 @@
11
import os
22

3-
from fontbakery.prelude import check, Message, FAIL, WARN
3+
from fontbakery.prelude import check, Message, FAIL, WARN, PASS
44

55

66
@check(
77
id="com.google.fonts/check/article/images",
88
conditions=["family_directory"],
99
rationale="""
1010
The purpose of this check is to ensure images (either raster or vector files)
11-
are placed on the correct directory (an `images` subdirectory inside `article`) and
12-
they they are not excessively large in filesize and resolution.
11+
are placed on the correct directory (an `images` subdirectory inside `article`)
12+
and that they are not excessively large in filesize and resolution.
1313
1414
These constraints are loosely based on infrastructure limitations under
1515
default configurations.
16+
17+
It also ensures that the article page has a minimum length and includes
18+
at least one visual asset.
1619
""",
1720
proposal="https://github.com/fonttools/fontbakery/issues/4594",
1821
experimental="Since 2024/Mar/25",
1922
)
20-
def com_google_fonts_check_metadata_parses(config, family_directory):
21-
"""Validate location, size and resolution of article images."""
22-
23-
from fontbakery.utils import bullet_list, image_dimensions
23+
def com_google_fonts_check_article_images(config, family_directory):
24+
"""Validate location, size, and resolution of article images,
25+
and ensure article page has minimum length and includes visual assets."""
26+
from bs4 import BeautifulSoup
27+
from fontbakery.utils import bullet_list
2428

2529
MAX_WIDTH = 2048
2630
MAX_HEIGHT = 1024
@@ -40,62 +44,98 @@ def is_vector(filename):
4044
return False
4145

4246
article_dir = os.path.join(family_directory, "article")
43-
images_dir = os.path.join(family_directory, "article", "images")
44-
if not os.path.isdir(article_dir):
47+
article_path = os.path.join(article_dir, "ARTICLE.en_us.html")
48+
images_dir = os.path.join(article_dir, "images")
49+
50+
if not os.path.exists(article_path):
4551
yield WARN, Message(
4652
"lacks-article",
4753
f"Family metadata at {family_directory} does not have an article.\n",
4854
)
49-
else:
50-
misplaced_files = [
51-
os.path.join(article_dir, filename)
52-
for filename in os.listdir(article_dir)
55+
return
56+
57+
with open(article_path, "r", encoding="utf-8") as file:
58+
content = file.read()
59+
60+
soup = BeautifulSoup(content, "html.parser")
61+
text = soup.get_text()
62+
word_count = len(text.split())
63+
char_count = len(text)
64+
65+
visuals = soup.find_all(["img", "svg", "video", "iframe"])
66+
missing_files = []
67+
for visual in visuals:
68+
if src := visual.get("src"):
69+
visual_path = os.path.join(article_dir, src)
70+
if not os.path.exists(visual_path):
71+
missing_files.append(src)
72+
73+
if word_count < 100 or char_count < 500:
74+
yield WARN, Message(
75+
"length-requirements-not-met",
76+
"Article page is too short!",
77+
)
78+
79+
if not visuals:
80+
yield WARN, Message("missing-visual-asset", "Article page lacks visual assets.")
81+
82+
if missing_files:
83+
yield WARN, Message(
84+
"missing-visual-file",
85+
f"Visual asset files are missing:\n{bullet_list(config, missing_files)}",
86+
)
87+
88+
misplaced_files = [
89+
os.path.join(article_dir, filename)
90+
for filename in os.listdir(article_dir)
91+
if is_vector(filename) or is_raster(filename)
92+
]
93+
all_image_files = misplaced_files
94+
if os.path.isdir(images_dir):
95+
all_image_files += [
96+
os.path.join(images_dir, filename)
97+
for filename in os.listdir(images_dir)
5398
if is_vector(filename) or is_raster(filename)
5499
]
55-
all_image_files = misplaced_files
56-
if os.path.isdir(images_dir):
57-
all_image_files += [
58-
os.path.join(images_dir, filename)
59-
for filename in os.listdir(images_dir)
60-
if is_vector(filename) or is_raster(filename)
61-
]
62-
if misplaced_files:
63-
yield WARN, Message(
64-
"misplaced-image-files",
65-
f"There are {len(misplaced_files)} image files in the `article`"
66-
f" directory and they should be moved to an `article/images`"
67-
f" subdirectory:\n\n"
68-
f"{bullet_list(config, misplaced_files)}\n",
100+
if misplaced_files:
101+
yield WARN, Message(
102+
"misplaced-image-files",
103+
f"There are {len(misplaced_files)} image files in the `article`"
104+
f" directory and they should be moved to an `article/images`"
105+
f" subdirectory:\n\n"
106+
f"{bullet_list(config, misplaced_files)}\n",
107+
)
108+
109+
for filename in all_image_files:
110+
if is_vector(filename):
111+
maxsize = MAXSIZE_VECTOR
112+
imagetype = "vector"
113+
elif is_raster(filename):
114+
maxsize = MAXSIZE_RASTER
115+
imagetype = "raster"
116+
else:
117+
# ignore this file type
118+
continue
119+
120+
filesize = os.stat(filename).st_size
121+
if filesize > maxsize:
122+
yield FAIL, Message(
123+
"filesize",
124+
f"`{filename}` has `{filesize} bytes`, but the maximum filesize"
125+
f" for {imagetype} images is `{maxsize} bytes`.",
126+
)
127+
128+
dim = image_dimensions(filename)
129+
if dim is None:
130+
# Could not detect image dimensions
131+
continue
132+
133+
w, h = dim
134+
if w > MAX_WIDTH or h > MAX_HEIGHT:
135+
yield FAIL, Message(
136+
"image-too-large",
137+
f"Image is too large: `{w} x {h} pixels`\n\n"
138+
f"Max resolution allowed: `{MAX_WIDTH} x {MAX_HEIGHT} pixels`",
69139
)
70140

71-
for filename in all_image_files:
72-
if is_vector(filename):
73-
maxsize = MAXSIZE_VECTOR
74-
imagetype = "vector"
75-
elif is_raster(filename):
76-
maxsize = MAXSIZE_RASTER
77-
imagetype = "raster"
78-
else:
79-
# ignore this file type
80-
continue
81-
82-
filesize = os.stat(filename).st_size
83-
if filesize > maxsize:
84-
yield FAIL, Message(
85-
"filesize",
86-
f"`{filename}` has `{filesize} bytes`, but the maximum filesize"
87-
f" for {imagetype} images is `{maxsize} bytes`.",
88-
)
89-
90-
dim = image_dimensions(filename)
91-
if dim is None:
92-
# Could not detect image dimensions
93-
continue
94-
95-
w, h = dim
96-
if w > MAX_WIDTH or h > MAX_HEIGHT:
97-
yield FAIL, Message(
98-
"image-too-large",
99-
f"Image is too large: `{w} x {h} pixels`\n\n"
100-
f"Max resulution allowed: `{MAX_WIDTH} x {MAX_HEIGHT} pixels`",
101-
)
141+
yield PASS, "ok"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<p><img src="missing_image1.png" alt="Sample Image 1"></p>
2+
<p><img src="missing_image2.png" alt="Sample Image 2"></p>
3+
4+
<p>This article has sufficient length but references visual asset files that do not exist in the repository.</p>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p>This article has sufficient length but lacks visual assets. It provides a lot of detailed information about the new font family, but there are no images, videos, or any other kind of visual content included in the article.</p>
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<p><img src="images/image.png" alt="Sample Image"></p>
2+
3+
<p>This article has sufficient length and includes visual assets. It provides a lot of detailed information about the new font family and includes an image for better understanding.</p>
4+
5+
<p>The article contains more than enough words to meet the length requirement, and it also features an image to ensure that the visual content requirement is satisfied.</p>

data/test/article_valid/article/images/image.svg

Loading

data/test/misplaced_image_files/article/image1.png

Loading

data/test/misplaced_image_files/article/image2.svg

Loading

data/test/missing_article/EMPTY_DIR

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p>This is a short article.</p>

tests/checks/googlefonts_test.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5019,22 +5019,58 @@ def test_check_linegaps():
50195019

50205020

50215021
def test_check_article_images():
5022-
"""Validate location, size and resolution of article images."""
5023-
check = CheckTester("com.google.fonts/check/article/images")
5022+
"""Test ARTICLE page visual content, length requirements, and image properties."""
5023+
check = CheckTester(
5024+
"com.google.fonts/check/article/images", profile=googlefonts_profile
5025+
)
50245026

5025-
# This one is know to be bad:
5026-
family_dir = TEST_FILE("tirodevanagarihindi")
5027+
# Test case for missing ARTICLE
5028+
family_directory = portable_path("data/test/missing_article")
5029+
assert_results_contain(
5030+
check(MockFont(family_directory=family_directory)), WARN, "lacks-article"
5031+
)
50275032

5033+
# Test case for ARTICLE not meeting length requirements
5034+
family_directory = portable_path("data/test/short_article")
50285035
assert_results_contain(
5029-
check(MockFont(family_directory=family_dir)),
5036+
check(MockFont(family_directory=family_directory)),
5037+
WARN,
5038+
"length-requirements-not-met",
5039+
)
5040+
5041+
# Test case for ARTICLE missing visual asset
5042+
family_directory = portable_path("data/test/article_no_visual")
5043+
assert_results_contain(
5044+
check(MockFont(family_directory=family_directory)), WARN, "missing-visual-asset"
5045+
)
5046+
5047+
# Test case for ARTICLE with missing visual files
5048+
family_directory = portable_path("data/test/article_missing_visual_file")
5049+
assert_results_contain(
5050+
check(MockFont(family_directory=family_directory)), WARN, "missing-visual-file"
5051+
)
5052+
5053+
# Test case for misplaced image files
5054+
family_directory = portable_path("data/test/misplaced_image_files")
5055+
assert_results_contain(
5056+
check(MockFont(family_directory=family_directory)),
50305057
WARN,
50315058
"misplaced-image-files",
5032-
"The files are not in the correct directory...",
50335059
)
50345060

5035-
# TODO: test WARN "lacks-article"
5036-
# TODO: test FAIL "image-too-large"
5037-
# TODO: test PASS
5061+
# TODO:
5062+
# # Test case for image file exceeding size limit
5063+
# family_directory = portable_path("data/test/large_image_file")
5064+
# assert_results_contain(check(MockFont(family_directory=family_directory)), FAIL, "filesize")
5065+
5066+
# TODO:
5067+
# # Test case for image file exceeding resolution limit
5068+
# family_directory = portable_path("data/test/large_resolution_image")
5069+
# assert_results_contain(check(MockFont(family_directory=family_directory)), FAIL, "image-too-large")
5070+
5071+
# Test case for ARTICLE meeting requirements
5072+
family_directory = portable_path("data/test/article_valid")
5073+
assert_PASS(check(MockFont(family_directory=family_directory)))
50385074

50395075

50405076
def test_varfont_instances_in_order():

0 commit comments

Comments
 (0)