Skip to content

Commit

Permalink
update check/article/images
Browse files Browse the repository at this point in the history
Ensure article page has minimum length and includes visual assets.
Also added more thorough code-testing.

com.google.fonts/check/article/images
On the Google Fonts profile.

(issue #4653)
  • Loading branch information
felipesanches committed May 31, 2024
1 parent 7893873 commit acacf4b
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 67 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ A more detailed list of changes is available in the corresponding milestones for
- Checks which validate the description files now also validate article files (issue #4730)
- **[com.google.font/check/description/unsupported_elements]:** Also checks for wellformedness of HTML video tags. (issue #4730)
- **[com.google.fonts/check/metadata/copyright_max_length], [com.google.fonts/check/metadata/nameid/copyright], [com.google.fonts/check/metadata/valid_copyright], [com.google.fonts/check/name/copyright_length]:** These checks have all been merged into [com.google.fonts/check/font_copyright]. (PR #4748 / issue #4735)
- **EXPERIMENTAL - [com.google.fonts/check/article/images]:** Ensure article page has minimum length and includes visual assets. Also added more thorough code-testing. (issue #4653)

#### On the Type Network profile
- **[com.typenetwork/check/usweightclass]:** Detects better "Semi", "Extra" and "Ultra" weights using a space on the name. (PR #4751)
Expand Down
156 changes: 98 additions & 58 deletions Lib/fontbakery/checks/googlefonts/article.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
import os

from fontbakery.prelude import check, Message, FAIL, WARN
from fontbakery.prelude import check, Message, FAIL, WARN, PASS


@check(
id="com.google.fonts/check/article/images",
conditions=["family_directory"],
rationale="""
The purpose of this check is to ensure images (either raster or vector files)
are placed on the correct directory (an `images` subdirectory inside `article`) and
they they are not excessively large in filesize and resolution.
are placed on the correct directory (an `images` subdirectory inside `article`)
and that they are not excessively large in filesize and resolution.
These constraints are loosely based on infrastructure limitations under
default configurations.
It also ensures that the article page has a minimum length and includes
at least one visual asset.
""",
proposal="https://github.com/fonttools/fontbakery/issues/4594",
experimental="Since 2024/Mar/25",
)
def com_google_fonts_check_metadata_parses(config, family_directory):
"""Validate location, size and resolution of article images."""

from fontbakery.utils import bullet_list, image_dimensions
def com_google_fonts_check_article_images(config, family_directory):
"""Validate location, size, and resolution of article images,
and ensure article page has minimum length and includes visual assets."""
from bs4 import BeautifulSoup
from fontbakery.utils import bullet_list

MAX_WIDTH = 2048
MAX_HEIGHT = 1024
Expand All @@ -40,62 +44,98 @@ def is_vector(filename):
return False

article_dir = os.path.join(family_directory, "article")
images_dir = os.path.join(family_directory, "article", "images")
if not os.path.isdir(article_dir):
article_path = os.path.join(article_dir, "ARTICLE.en_us.html")
images_dir = os.path.join(article_dir, "images")

if not os.path.exists(article_path):
yield WARN, Message(
"lacks-article",
f"Family metadata at {family_directory} does not have an article.\n",
)
else:
misplaced_files = [
os.path.join(article_dir, filename)
for filename in os.listdir(article_dir)
return

with open(article_path, "r", encoding="utf-8") as file:
content = file.read()

soup = BeautifulSoup(content, "html.parser")
text = soup.get_text()
word_count = len(text.split())
char_count = len(text)

visuals = soup.find_all(["img", "svg", "video", "iframe"])
missing_files = []
for visual in visuals:
if src := visual.get("src"):
visual_path = os.path.join(article_dir, src)
if not os.path.exists(visual_path):
missing_files.append(src)

if word_count < 100 or char_count < 500:
yield WARN, Message(
"length-requirements-not-met",
"Article page is too short!",
)

if not visuals:
yield WARN, Message("missing-visual-asset", "Article page lacks visual assets.")

if missing_files:
yield WARN, Message(
"missing-visual-file",
f"Visual asset files are missing:\n{bullet_list(config, missing_files)}",
)

misplaced_files = [
os.path.join(article_dir, filename)
for filename in os.listdir(article_dir)
if is_vector(filename) or is_raster(filename)
]
all_image_files = misplaced_files
if os.path.isdir(images_dir):
all_image_files += [
os.path.join(images_dir, filename)
for filename in os.listdir(images_dir)
if is_vector(filename) or is_raster(filename)
]
all_image_files = misplaced_files
if os.path.isdir(images_dir):
all_image_files += [
os.path.join(images_dir, filename)
for filename in os.listdir(images_dir)
if is_vector(filename) or is_raster(filename)
]
if misplaced_files:
yield WARN, Message(
"misplaced-image-files",
f"There are {len(misplaced_files)} image files in the `article`"
f" directory and they should be moved to an `article/images`"
f" subdirectory:\n\n"
f"{bullet_list(config, misplaced_files)}\n",
if misplaced_files:
yield WARN, Message(
"misplaced-image-files",
f"There are {len(misplaced_files)} image files in the `article`"
f" directory and they should be moved to an `article/images`"
f" subdirectory:\n\n"
f"{bullet_list(config, misplaced_files)}\n",
)

for filename in all_image_files:
if is_vector(filename):
maxsize = MAXSIZE_VECTOR
imagetype = "vector"
elif is_raster(filename):
maxsize = MAXSIZE_RASTER
imagetype = "raster"
else:
# ignore this file type
continue

filesize = os.stat(filename).st_size
if filesize > maxsize:
yield FAIL, Message(
"filesize",
f"`{filename}` has `{filesize} bytes`, but the maximum filesize"
f" for {imagetype} images is `{maxsize} bytes`.",
)

dim = image_dimensions(filename)
if dim is None:
# Could not detect image dimensions
continue

w, h = dim
if w > MAX_WIDTH or h > MAX_HEIGHT:
yield FAIL, Message(
"image-too-large",
f"Image is too large: `{w} x {h} pixels`\n\n"
f"Max resolution allowed: `{MAX_WIDTH} x {MAX_HEIGHT} pixels`",
)

for filename in all_image_files:
if is_vector(filename):
maxsize = MAXSIZE_VECTOR
imagetype = "vector"
elif is_raster(filename):
maxsize = MAXSIZE_RASTER
imagetype = "raster"
else:
# ignore this file type
continue

filesize = os.stat(filename).st_size
if filesize > maxsize:
yield FAIL, Message(
"filesize",
f"`{filename}` has `{filesize} bytes`, but the maximum filesize"
f" for {imagetype} images is `{maxsize} bytes`.",
)

dim = image_dimensions(filename)
if dim is None:
# Could not detect image dimensions
continue

w, h = dim
if w > MAX_WIDTH or h > MAX_HEIGHT:
yield FAIL, Message(
"image-too-large",
f"Image is too large: `{w} x {h} pixels`\n\n"
f"Max resulution allowed: `{MAX_WIDTH} x {MAX_HEIGHT} pixels`",
)
yield PASS, "ok"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<p><img src="missing_image1.png" alt="Sample Image 1"></p>
<p><img src="missing_image2.png" alt="Sample Image 2"></p>

<p>This article has sufficient length but references visual asset files that do not exist in the repository.</p>
1 change: 1 addition & 0 deletions data/test/article_no_visual/article/ARTICLE.en_us.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p>This article has sufficient length but lacks visual assets. It provides a lot of detailed information about the new font family, but there are no images, videos, or any other kind of visual content included in the article.</p>
5 changes: 5 additions & 0 deletions data/test/article_valid/article/ARTICLE.en_us.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<p><img src="images/image.png" alt="Sample Image"></p>

<p>This article has sufficient length and includes visual assets. It provides a lot of detailed information about the new font family and includes an image for better understanding.</p>

<p>The article contains more than enough words to meet the length requirement, and it also features an image to ensure that the visual content requirement is satisfied.</p>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
1 change: 1 addition & 0 deletions data/test/short_article/article/ARTICLE.en_us.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p>This is a short article.</p>
54 changes: 45 additions & 9 deletions tests/checks/googlefonts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5019,22 +5019,58 @@ def test_check_linegaps():


def test_check_article_images():
"""Validate location, size and resolution of article images."""
check = CheckTester("com.google.fonts/check/article/images")
"""Test ARTICLE page visual content, length requirements, and image properties."""
check = CheckTester(
"com.google.fonts/check/article/images", profile=googlefonts_profile
)

# This one is know to be bad:
family_dir = TEST_FILE("tirodevanagarihindi")
# Test case for missing ARTICLE
family_directory = portable_path("data/test/missing_article")
assert_results_contain(
check(MockFont(family_directory=family_directory)), WARN, "lacks-article"
)

# Test case for ARTICLE not meeting length requirements
family_directory = portable_path("data/test/short_article")
assert_results_contain(
check(MockFont(family_directory=family_dir)),
check(MockFont(family_directory=family_directory)),
WARN,
"length-requirements-not-met",
)

# Test case for ARTICLE missing visual asset
family_directory = portable_path("data/test/article_no_visual")
assert_results_contain(
check(MockFont(family_directory=family_directory)), WARN, "missing-visual-asset"
)

# Test case for ARTICLE with missing visual files
family_directory = portable_path("data/test/article_missing_visual_file")
assert_results_contain(
check(MockFont(family_directory=family_directory)), WARN, "missing-visual-file"
)

# Test case for misplaced image files
family_directory = portable_path("data/test/misplaced_image_files")
assert_results_contain(
check(MockFont(family_directory=family_directory)),
WARN,
"misplaced-image-files",
"The files are not in the correct directory...",
)

# TODO: test WARN "lacks-article"
# TODO: test FAIL "image-too-large"
# TODO: test PASS
# TODO:
# # Test case for image file exceeding size limit
# family_directory = portable_path("data/test/large_image_file")
# assert_results_contain(check(MockFont(family_directory=family_directory)), FAIL, "filesize")

# TODO:
# # Test case for image file exceeding resolution limit
# family_directory = portable_path("data/test/large_resolution_image")
# assert_results_contain(check(MockFont(family_directory=family_directory)), FAIL, "image-too-large")

# Test case for ARTICLE meeting requirements
family_directory = portable_path("data/test/article_valid")
assert_PASS(check(MockFont(family_directory=family_directory)))


def test_varfont_instances_in_order():
Expand Down

0 comments on commit acacf4b

Please sign in to comment.