mozilla · mathjazz · Mar 17, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/pontoon/machinery/management/commands/refine_translation.py b/pontoon/machinery/management/commands/refine_translation.py
@@ -4,7 +4,7 @@
 
 
 class Command(BaseCommand):
-    help = "Refines machine translations using OpenAI's GPT-4 API with specified characteristics"
+    help = "Refines machine translations using OpenAI's GPT API with specified characteristics"
 
     def add_arguments(self, parser):
         parser.add_argument(

diff --git a/pontoon/machinery/openai_service.py b/pontoon/machinery/openai_service.py
@@ -5,7 +5,6 @@
 from django.conf import settings
 from django.core.cache import cache
 
-from pontoon.base.models import Locale
 from pontoon.machinery.utils import (
     get_machinery_service_cache_key,
     set_machinery_service_cache_key,
@@ -19,90 +18,157 @@ def __init__(self):
         self.client = OpenAI()
 
     def get_translation(
-        self, english_text, translated_text, characteristic, target_language_name
+        self,
+        english_text,
+        translated_text,
+        characteristic,
+        locale,
+        entity_id=None,
+        entity_comment=None,
+        group_comment=None,
+        resource_comment=None,
+        pinned_comments=None,
+        terms=None,
     ):
+        terms_cache_key = (
+            str(sorted((t.get("text", "") for t in terms))) if terms else ""
+        )
+        pinned_comments_cache_key = (
+            str(sorted(pinned_comments)) if pinned_comments else ""
+        )
         cache_key = get_machinery_service_cache_key(
             "openai_chatgpt",
             english_text,
             translated_text,
             characteristic,
-            target_language_name,
+            locale.code,
+            entity_id or "",
+            entity_comment or "",
+            group_comment or "",
+            resource_comment or "",
+            pinned_comments_cache_key,
+            terms_cache_key,
         )
         cached = cache.get(cache_key)
         if cached is not None:
             return cached
 
-        try:
-            target_language = Locale.objects.get(name=target_language_name)
-        except Locale.DoesNotExist:
-            raise ValueError(
-                f"The target language '{target_language_name}' is not supported."
+        style_goals = {
+            "informal": f"Use simple, everyday {locale.name} ({locale.code}) — avoid jargon, technical terms, and formal constructions.",
+            "formal": f"Use formal {locale.name} ({locale.code}) throughout; maintain a consistent register and do not mix formal and informal modes.",
+            "rephrased": f"Provide an alternative wording that preserves the original meaning; adapt idioms and culturally marked expressions for {locale.name} ({locale.code}); you may restructure sentences but must not introduce new information or omit essential meaning.",
+        }
+
+        style_goal = style_goals.get(characteristic)
+        if style_goal is None:
+            raise ValueError(f"Unrecognized characteristic: '{characteristic}'")
+
+        # Separate the instruction from the data.
+        # It makes it hard for injected text to masquerade as instructions.
+        context_parts = []
+        if entity_id:
+            context_parts.append(f"STRING ID:\n{entity_id}")
+        if resource_comment:
+            context_parts.append(f"RESOURCE COMMENT:\n{resource_comment}")
+        if group_comment:
+            context_parts.append(f"GROUP COMMENT:\n{group_comment}")
+        if entity_comment:
+            context_parts.append(f"STRING COMMENT:\n{entity_comment}")
+        if pinned_comments:
+            pinned_block = "\n".join(f"- {c}" for c in pinned_comments)
+            context_parts.append(f"PINNED COMMENTS:\n{pinned_block}")
+        if terms:
+            term_lines = []
+            for term in terms:
+                text = term.get("text", "")
+                pos = term.get("part_of_speech", "")
+                translation = term.get("translation", "")
+                parts = [f'"{text}"']
+                if pos:
+                    parts.append(f"({pos})")
+                if translation:
+                    parts.append(f'→ "{translation}"')
+                term_lines.append(" ".join(parts))
+            terms_block = "\n".join(f"- {line}" for line in term_lines)
+            context_parts.append(
+                f"TERMINOLOGY:\nThese are terminology matches in the source text that you should consider:\n{terms_block}"
             )
+        context_parts.append(f"ENGLISH SOURCE:\n{english_text}")
+        context_parts.append(f"MACHINE TRANSLATION (for reference):\n{translated_text}")
+        user_prompt = "\n\n".join(context_parts)
 
-        intro_text = f"Refine the {target_language} machine translation below to make it {characteristic}."
-
-        common_rules = textwrap.dedent(
-            """Follow these rules IN ORDER OF PRIORITY:
-            1) ENDING PUNCTUATION — SEMANTICS, NOT LITERAL CHAR:
-                - Detect the English ending: none, ".", "?", "!", "…".
-                - The translation MUST express the same ending SEMANTIC:
-                    • if English ends with "?" → translation ends with a question.
-                    • if English ends with "!" → translation ends with an exclamation.
-                    • if English ends with "…" → translation ends with an ellipsis.
-                    • if English has NO closing punctuation → translation MUST NOT end with ".", "?", "!", or "…".
-                - Do not add a final period if the English has none.
-                - Respect orthographic and typographic rules of the target language regarding punctuation, like using non-breaking spaces in French, adding opening "¿" or "¡" in Spanish, etc.
-            2) Preserve all HTML tags and their order. Do not add, remove, or reorder tags."""
-        )
+        system_header = textwrap.dedent(
+            f"""\
+            You are an expert {locale.name} ({locale.code}) localization specialist.
 
-        informal = textwrap.dedent(
-            f"""{intro_text}
-            Revise the {target_language} translation to use simpler language.
-            {common_rules}
-            3) Clarity and Simplicity: keep wording straightforward and consistent.
-            Output only the revised translation."""
+            Your task: produce a {characteristic} {locale.name} ({locale.code}) translation of a UI string.
+            Use the provided machine translation as a reference, but you are not bound by it — rewrite freely to achieve the best result.
+            """
         )
 
-        formal = textwrap.dedent(
-            f"""{intro_text}
-            Revise the {target_language} translation to use a higher level of formality.
-            {common_rules}
-            3) Consistency: maintain a consistent level of formality throughout; do not mix formal and informal modes.
-            4) Preserve all HTML tags and their order. Do not add, remove, or reorder tags.
-            5) Clarity and Precision: keep wording clear and unambiguous while remaining formal.
-            Output only the revised translation."""
+        context_instructions = []
+        if entity_id:
+            context_instructions.append(
+                "STRING ID: use it to infer the UI context (e.g., button, menu item, page title, tooltip) and adapt length and phrasing accordingly."
+            )
+        if resource_comment:
+            context_instructions.append(
+                "RESOURCE COMMENT: general notes about the file — use it as additional context."
+            )
+        if group_comment:
+            context_instructions.append(
+                "GROUP COMMENT: notes about the group of messages this string belongs to — use it as additional context."
+            )
+        if entity_comment:
+            context_instructions.append(
+                "STRING COMMENT: treat it as authoritative translator notes — it may specify placeholders to preserve exactly, terms that must not be translated, or other constraints. STRING COMMENT requirements take precedence over all stylistic choices."
+            )
+        if pinned_comments:
+            context_instructions.append(
+                "PINNED COMMENTS: this is a comment added by a project manager — treat them as high-priority guidance from the localization team."
+            )
+        if terms:
+            context_instructions.append(
+                "TERMINOLOGY: use the given translations for those terms consistently in your output, unless you believe the existing translation to be incorrect for the context."
+            )
+        context_block = (
+            "\n".join(context_instructions) + "\n\n" if context_instructions else ""
         )
 
-        rephrased = textwrap.dedent(
-            f"""{intro_text}
-            Provide an alternative translation that preserves the original meaning while varying the wording.
-            {common_rules}
-            3) Cultural and Idiomatic Fit: adapt idioms and culturally marked expressions appropriately for {target_language}; you may restructure sentences but must not introduce new information or omit essential meaning.
-            4) Clarity and Naturalness: ensure the result reads naturally and is easy to understand.
-            Output only the alternative translation."""
+        system_rules = textwrap.dedent(
+            f"""\
+            Your goal is to produce a natural, grammatically correct translation. Follow these rules strictly; if rules conflict, earlier rules take priority.
+            1) ENDING PUNCTUATION — PRESERVE SEMANTICS
+                - Determine the ending punctuation of the English source text (ignore trailing spaces and HTML tags).
+                - The translation MUST end with the equivalent punctuation. Both directions are hard constraints:
+                    • English ends with "." → translation MUST end with "." (or target-language equivalent)
+                    • English ends with "?" → translation MUST end with a question mark
+                    • English ends with "!" → translation MUST end with an exclamation mark
+                    • English ends with "…" → translation MUST end with an ellipsis
+                    • English has NO ending punctuation → translation MUST NOT end with ".", "?", "!", or "…"
+                - Apply correct punctuation conventions for the target language (e.g. Spanish "¿ ¡", French non-breaking space before "?", "!", ":").
+            2) HTML TAGS
+                - Preserve all HTML tags exactly as in the source:
+                    - Do not add, remove, reorder, or modify tags or attributes
+                - Translate only the text content between tags, or the attributes if they contain translatable text (e.g., "alt", "title").
+                - Keep punctuation placement consistent with the source structure (do not move punctuation across tag boundaries unless required by the target language grammar).
+            3) {style_goal}
+
+            Output only the translation, with no explanation."""
         )
 
-        system_messages = {
-            "informal": informal,
-            "formal": formal,
-            "rephrased": rephrased,
-        }
+        system_message = system_header + context_block + system_rules
 
-        system_message = system_messages.get(characteristic)
-        if system_message is None:
-            raise ValueError(f"Unrecognized characteristic: '{characteristic}'")
-
-        # Separate the instruction from the data.
-        # It makes it hard for injected text to masquerade as instructions.
-        user_prompt = (
-            f"{intro_text}\n\n"
-            f"ENGLISH SOURCE:\n{english_text}\n\n"
-            f"MACHINE TRANSLATION TO REFINE:\n{translated_text}"
-        )
+        # TODO: remove before merge.
+        # Print the full prompt before sending to help with debug.
+        if settings.DEBUG:
+            print(
+                f"[OpenAI] system:\n{system_message}\n\n[OpenAI] user:\n{user_prompt}"
+            )
 
         # Call the OpenAI API with the constructed prompt
         response = self.client.chat.completions.create(
-            model="gpt-4.1-2025-04-14",
+            model=settings.OPENAI_MODEL,
             messages=[
                 {"role": "system", "content": system_message},
                 {"role": "user", "content": user_prompt},

diff --git a/pontoon/machinery/tests/test_views.py b/pontoon/machinery/tests/test_views.py
@@ -16,6 +16,10 @@
 )
 from pontoon.test.factories import (
     EntityFactory,
+    SectionFactory,
+    TeamCommentFactory,
+    TermFactory,
+    TermTranslationFactory,
     TranslationFactory,
     TranslationMemoryFactory,
 )
@@ -303,19 +307,84 @@ def test_view_gpt_transform_cache(member, locale_a, openai_api_key):
             "english_text": "Hello",
             "translated_text": "Hola",
             "characteristic": "formal",
-            "locale": locale_a.name,
+            "locale": locale_a.code,
         }
 
-        response1 = member.client.get(url, params)
+        response1 = member.client.post(url, params)
         assert MockOpenAI.return_value.chat.completions.create.call_count == 1
 
         # Second identical request should be served from cache
-        response2 = member.client.get(url, params)
+        response2 = member.client.post(url, params)
         assert MockOpenAI.return_value.chat.completions.create.call_count == 1
 
     assert json.loads(response1.content) == json.loads(response2.content)
 
 
+@pytest.mark.django_db
+def test_view_gpt_transform_context(member, locale_a, openai_api_key):
+    url = reverse("pontoon.gpt_transform")
+    cache.clear()
+
+    mock_response = MagicMock()
+    mock_response.choices[0].message.content = "translated"
+
+    # Create entity with full context: key, comment, group (section) comment,
+    # resource comment
+    section = SectionFactory(key=["nav"], comment="Navigation section")
+    entity = EntityFactory(
+        key=["open-browser"],
+        string="Open browser",
+        comment="Button label",
+        resource=section.resource,
+        section=section,
+    )
+    entity.resource.comment = "Main UI file"
+    entity.resource.save(update_fields=["comment"])
+
+    # Pinned comment
+    TeamCommentFactory(
+        entity=entity,
+        locale=locale_a,
+        content="<p>Use formal register</p>",
+        pinned=True,
+    )
+    TeamCommentFactory(
+        entity=entity, locale=locale_a, content="Keep it short", pinned=True
+    )
+
+    # Term matching the source string, with a translation for the target locale
+    term = TermFactory(
+        text="browser", part_of_speech="noun", definition="A web browser"
+    )
+    TermTranslationFactory(term=term, locale=locale_a, text="navigateur")
+
+    with patch("pontoon.machinery.openai_service.OpenAI") as MockOpenAI:
+        MockOpenAI.return_value.chat.completions.create.return_value = mock_response
+
+        member.client.post(
+            url,
+            {
+                "english_text": "Open browser",
+                "translated_text": "Ouvrir le navigateur",
+                "characteristic": "formal",
+                "locale": locale_a.code,
+                "entity_pk": entity.pk,
+            },
+        )
+
+    call_kwargs = MockOpenAI.return_value.chat.completions.create.call_args
+    user_message = call_kwargs.kwargs["messages"][1]["content"]
+    assert "STRING ID:\nopen-browser" in user_message
+    assert "STRING COMMENT:\nButton label" in user_message
+    assert "GROUP COMMENT:\nNavigation section" in user_message
+    assert "RESOURCE COMMENT:\nMain UI file" in user_message
+    assert "PINNED COMMENTS:" in user_message
+    assert "Use formal register" in user_message
+    assert "Keep it short" in user_message
+    assert "TERMINOLOGY:" in user_message
+    assert '"browser" (noun) → "navigateur"' in user_message
+
+
 @pytest.mark.django_db
 def test_view_caighdean(client, entity_a):
     gd = Locale.objects.get(code="gd")