diff --git a/java/com/google/dotprompt/parser/BUILD.bazel b/java/com/google/dotprompt/parser/BUILD.bazel
new file mode 100644
index 000000000..e2089b2a0
--- /dev/null
+++ b/java/com/google/dotprompt/parser/BUILD.bazel
@@ -0,0 +1,56 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+load("@rules_java//java:defs.bzl", "java_library", "java_test")
+
+java_library(
+ name = "parser",
+ srcs = [
+ "Parser.java",
+ "Picoschema.java",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//java/com/google/dotprompt/models",
+ "//java/com/google/dotprompt/resolvers",
+ "@maven//:com_fasterxml_jackson_core_jackson_databind",
+ "@maven//:com_fasterxml_jackson_dataformat_jackson_dataformat_yaml",
+ ],
+)
+
+java_test(
+ name = "ParserTest",
+ srcs = ["ParserTest.java"],
+ test_class = "com.google.dotprompt.parser.ParserTest",
+ deps = [
+ ":parser",
+ "//java/com/google/dotprompt/models",
+ "@maven//:com_google_truth_truth",
+ "@maven//:junit_junit",
+ ],
+)
+
+java_test(
+ name = "PicoschemaTest",
+ srcs = ["PicoschemaTest.java"],
+ test_class = "com.google.dotprompt.parser.PicoschemaTest",
+ deps = [
+ ":parser",
+ "//java/com/google/dotprompt/resolvers",
+ "@maven//:com_google_truth_truth",
+ "@maven//:junit_junit",
+ ],
+)
diff --git a/java/com/google/dotprompt/parser/Parser.java b/java/com/google/dotprompt/parser/Parser.java
new file mode 100644
index 000000000..7e82e94fb
--- /dev/null
+++ b/java/com/google/dotprompt/parser/Parser.java
@@ -0,0 +1,523 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package com.google.dotprompt.parser;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+import com.google.dotprompt.models.DataArgument;
+import com.google.dotprompt.models.MediaContent;
+import com.google.dotprompt.models.MediaPart;
+import com.google.dotprompt.models.Message;
+import com.google.dotprompt.models.Part;
+import com.google.dotprompt.models.PendingPart;
+import com.google.dotprompt.models.Prompt;
+import com.google.dotprompt.models.Role;
+import com.google.dotprompt.models.TextPart;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * Parses Dotprompt files into Prompt objects and rendered templates into messages.
+ *
+ *
This class handles:
+ *
+ *
+ *
Parsing YAML frontmatter and separating configuration from template body
+ *
Namespace expansion for extension fields
+ *
Converting rendered templates into structured messages
+ *
Processing role, history, media, and section markers
+ *
Handling chat history insertion
+ *
+ */
+public class Parser {
+
+ /** Prefix for role markers in the template. */
+ public static final String ROLE_MARKER_PREFIX = "<<Matches a YAML frontmatter block between "---" markers. Handles different line endings
+ * (CRLF, LF, CR) and optional trailing whitespace on the marker lines.
+ */
+ private static final Pattern FRONTMATTER_PATTERN =
+ Pattern.compile("(?ms)^\\s*---[ \\t]*[\\r\\n]+(.*?)^[ \\t]*---[ \\t]*[\\r\\n]+");
+
+ /**
+ * Pattern to match role and history markers.
+ *
+ *
Examples: {@code <<>>}, {@code <<>>}
+ */
+ public static final Pattern ROLE_AND_HISTORY_MARKER_PATTERN =
+ Pattern.compile("(<<>>");
+
+ /**
+ * Pattern to match media and section markers.
+ *
+ *
Examples: {@code <<>>}, {@code <<>>}
+ */
+ public static final Pattern MEDIA_AND_SECTION_MARKER_PATTERN =
+ Pattern.compile("(<<>>");
+
+ /** ObjectMapper for parsing YAML frontmatter. */
+ private static final ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
+
+ /** Reserved metadata keywords that are handled specially, not moved to ext. */
+ public static final Set RESERVED_METADATA_KEYWORDS =
+ Set.of(
+ "config",
+ "description",
+ "ext",
+ "input",
+ "model",
+ "name",
+ "output",
+ "raw",
+ "toolDefs",
+ "tools",
+ "variant",
+ "version");
+
+ /**
+ * Parses a Dotprompt template string into a Prompt object.
+ *
+ * @param content The raw string content of the prompt file (including frontmatter).
+ * @return The parsed Prompt object containing the template and configuration.
+ * @throws IOException If parsing the YAML frontmatter fails.
+ */
+ public static Prompt parse(String content) throws IOException {
+ if (content == null || content.trim().isEmpty()) {
+ return new Prompt("", Map.of());
+ }
+
+ Matcher matcher = FRONTMATTER_PATTERN.matcher(content);
+ if (matcher.find()) {
+ String yaml = matcher.group(1);
+ String template = content.substring(matcher.end());
+
+ Map config = new HashMap<>();
+ if (yaml != null && !yaml.trim().isEmpty()) {
+ try {
+ @SuppressWarnings("unchecked")
+ Map rawConfig = mapper.readValue(yaml, Map.class);
+ config = expandNamespacedKeys(rawConfig);
+ config.put("raw", rawConfig);
+ } catch (IOException e) {
+ throw e;
+ }
+ }
+ return new Prompt(template, config);
+ } else {
+ return new Prompt(content, Map.of());
+ }
+ }
+
+ /**
+ * Splits a string by a regex pattern while filtering out empty/whitespace-only pieces.
+ *
+ * @param source The source string to split.
+ * @param pattern The pattern to use for splitting.
+ * @return A list of non-empty string pieces.
+ */
+ public static List splitByRegex(String source, Pattern pattern) {
+ if (source == null || source.isEmpty()) {
+ return List.of();
+ }
+
+ List result = new ArrayList<>();
+ Matcher matcher = pattern.matcher(source);
+ int lastEnd = 0;
+
+ while (matcher.find()) {
+ // Add text before the match
+ if (matcher.start() > lastEnd) {
+ String beforeMatch = source.substring(lastEnd, matcher.start());
+ if (!beforeMatch.trim().isEmpty()) {
+ result.add(beforeMatch);
+ }
+ }
+ // Add the captured group (without the closing >>>)
+ String captured = matcher.group(1);
+ if (captured != null && !captured.trim().isEmpty()) {
+ result.add(captured);
+ }
+ lastEnd = matcher.end();
+ }
+
+ // Add remaining text after last match
+ if (lastEnd < source.length()) {
+ String remaining = source.substring(lastEnd);
+ if (!remaining.trim().isEmpty()) {
+ result.add(remaining);
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Splits a rendered template string by role and history markers.
+ *
+ * @param renderedString The template string to split.
+ * @return List of non-empty string pieces.
+ */
+ public static List splitByRoleAndHistoryMarkers(String renderedString) {
+ return splitByRegex(renderedString, ROLE_AND_HISTORY_MARKER_PATTERN);
+ }
+
+ /**
+ * Splits a source string by media and section markers.
+ *
+ * @param source The source string to split.
+ * @return List of non-empty string pieces.
+ */
+ public static List splitByMediaAndSectionMarkers(String source) {
+ return splitByRegex(source, MEDIA_AND_SECTION_MARKER_PATTERN);
+ }
+
+ /**
+ * Converts a rendered template string into a list of messages.
+ *
+ *
Processes role markers and history placeholders to structure the conversation.
+ *
+ * @param renderedString The rendered template string to convert.
+ * @param data Optional data containing message history.
+ * @return List of structured messages.
+ */
+ public static List toMessages(String renderedString, DataArgument data) {
+ MessageSource currentMessage = new MessageSource(Role.USER, "");
+ List messageSources = new ArrayList<>();
+ messageSources.add(currentMessage);
+
+ for (String piece : splitByRoleAndHistoryMarkers(renderedString)) {
+ if (piece.startsWith(ROLE_MARKER_PREFIX)) {
+ String roleName = piece.substring(ROLE_MARKER_PREFIX.length());
+ Role role = Role.fromString(roleName);
+
+ if (currentMessage.source != null && !currentMessage.source.trim().isEmpty()) {
+ // Current message has content, create a new message
+ currentMessage = new MessageSource(role, "");
+ messageSources.add(currentMessage);
+ } else {
+ // Update the role of the current empty message
+ currentMessage.role = role;
+ }
+ } else if (piece.startsWith(HISTORY_MARKER_PREFIX)) {
+ // Add the history messages to the message sources
+ List historyMessages =
+ transformMessagesToHistory(data != null ? data.messages() : List.of());
+ for (Message msg : historyMessages) {
+ messageSources.add(new MessageSource(msg.role(), msg.content(), msg.metadata()));
+ }
+
+ // Add a new message source for the model
+ currentMessage = new MessageSource(Role.MODEL, "");
+ messageSources.add(currentMessage);
+ } else {
+ // Add the piece to the current message source
+ currentMessage.source =
+ (currentMessage.source != null ? currentMessage.source : "") + piece;
+ }
+ }
+
+ List messages = messageSourcesToMessages(messageSources);
+ return insertHistory(messages, data != null ? data.messages() : null);
+ }
+
+ /**
+ * Converts a rendered template string into a list of messages with no data context.
+ *
+ * @param renderedString The rendered template string to convert.
+ * @return List of structured messages.
+ */
+ public static List toMessages(String renderedString) {
+ return toMessages(renderedString, null);
+ }
+
+ /**
+ * Transforms an array of messages by adding history metadata to each message.
+ *
+ * @param messages Array of messages to transform.
+ * @return Array of messages with history metadata added.
+ */
+ public static List transformMessagesToHistory(List messages) {
+ if (messages == null) {
+ return List.of();
+ }
+ return messages.stream()
+ .map(
+ m -> {
+ Map metadata = new HashMap<>();
+ if (m.metadata() != null) {
+ metadata.putAll(m.metadata());
+ }
+ metadata.put("purpose", "history");
+ return new Message(m.role(), m.content(), metadata);
+ })
+ .collect(Collectors.toList());
+ }
+
+ /**
+ * Checks if the messages have history metadata.
+ *
+ * @param messages The messages to check.
+ * @return True if any message has history metadata.
+ */
+ public static boolean messagesHaveHistory(List messages) {
+ if (messages == null) {
+ return false;
+ }
+ return messages.stream()
+ .anyMatch(m -> m.metadata() != null && "history".equals(m.metadata().get("purpose")));
+ }
+
+ /**
+ * Inserts historical messages into the conversation at appropriate positions.
+ *
+ *
The history is inserted:
+ *
+ *
+ *
Before the last user message if there is a user message
+ *
At the end of the conversation if there is no history or no user message
+ *
+ *
+ * @param messages Current array of messages.
+ * @param history Historical messages to insert.
+ * @return Messages with history inserted.
+ */
+ public static List insertHistory(List messages, List history) {
+ // If we have no history or find an existing instance of history, return original
+ if (history == null || history.isEmpty() || messagesHaveHistory(messages)) {
+ return messages;
+ }
+
+ // If there are no messages, return the history
+ if (messages == null || messages.isEmpty()) {
+ return history;
+ }
+
+ Message lastMessage = messages.get(messages.size() - 1);
+ if (lastMessage.role() == Role.USER) {
+ // Insert history before the last user message
+ List result = new ArrayList<>(messages.subList(0, messages.size() - 1));
+ result.addAll(history);
+ result.add(lastMessage);
+ return result;
+ }
+
+ // Append history to the end
+ List result = new ArrayList<>(messages);
+ result.addAll(history);
+ return result;
+ }
+
+ /**
+ * Converts a source string into a list of parts, processing media and section markers.
+ *
+ * @param source The source string to convert into parts.
+ * @return List of structured parts (text, media, or metadata).
+ */
+ public static List toParts(String source) {
+ if (source == null || source.isEmpty()) {
+ return List.of();
+ }
+ return splitByMediaAndSectionMarkers(source).stream()
+ .map(Parser::parsePart)
+ .collect(Collectors.toList());
+ }
+
+ /**
+ * Parses a part from a string.
+ *
+ * @param piece The piece to parse.
+ * @return Parsed part (TextPart, MediaPart, or PendingPart).
+ */
+ public static Part parsePart(String piece) {
+ if (piece.startsWith(MEDIA_MARKER_PREFIX)) {
+ return parseMediaPart(piece);
+ }
+ if (piece.startsWith(SECTION_MARKER_PREFIX)) {
+ return parseSectionPart(piece);
+ }
+ return parseTextPart(piece);
+ }
+
+ /**
+ * Parses a media part from a string.
+ *
+ * @param piece The piece to parse.
+ * @return Parsed media part.
+ * @throws IllegalArgumentException If the piece is not a valid media marker.
+ */
+ public static MediaPart parseMediaPart(String piece) {
+ if (!piece.startsWith(MEDIA_MARKER_PREFIX)) {
+ throw new IllegalArgumentException("Invalid media piece: " + piece);
+ }
+ String[] parts = piece.split(" ");
+ String url = parts.length > 1 ? parts[1] : "";
+ String contentType = parts.length > 2 ? parts[2] : null;
+
+ MediaContent media =
+ contentType != null && !contentType.trim().isEmpty()
+ ? new MediaContent(url, contentType)
+ : new MediaContent(url, null);
+ return new MediaPart(media);
+ }
+
+ /**
+ * Parses a section part from a string.
+ *
+ * @param piece The piece to parse.
+ * @return Parsed pending part with section metadata.
+ * @throws IllegalArgumentException If the piece is not a valid section marker.
+ */
+ public static PendingPart parseSectionPart(String piece) {
+ if (!piece.startsWith(SECTION_MARKER_PREFIX)) {
+ throw new IllegalArgumentException("Invalid section piece: " + piece);
+ }
+ String[] parts = piece.split(" ");
+ String sectionType = parts.length > 1 ? parts[1] : "";
+ Map metadata = new HashMap<>();
+ metadata.put("purpose", sectionType);
+ metadata.put("pending", true);
+ return new PendingPart(metadata);
+ }
+
+ /**
+ * Parses a text part from a string.
+ *
+ * @param piece The piece to parse.
+ * @return Parsed text part.
+ */
+ public static TextPart parseTextPart(String piece) {
+ return new TextPart(piece);
+ }
+
+ /**
+ * Processes an array of message sources into an array of messages.
+ *
+ * @param messageSources List of message sources.
+ * @return List of structured messages.
+ */
+ private static List messageSourcesToMessages(List messageSources) {
+ List messages = new ArrayList<>();
+ for (MessageSource m : messageSources) {
+ if (m.content != null || (m.source != null && !m.source.isEmpty())) {
+ List content = m.content != null ? m.content : toParts(m.source);
+ Message message = new Message(m.role, content, m.metadata);
+ messages.add(message);
+ }
+ }
+ return messages;
+ }
+
+ /**
+ * Expands dot-separated keys in the configuration into nested maps.
+ *
+ *
Known top-level keys are preserved. Unknown keys are moved into an 'ext' map.
+ *
+ * @param input The raw configuration map.
+ * @return A new map with namespaces expanded.
+ */
+ private static Map expandNamespacedKeys(Map input) {
+ Map result = new HashMap<>();
+ Map ext = new HashMap<>();
+
+ for (Map.Entry entry : input.entrySet()) {
+ String key = entry.getKey();
+ Object value = entry.getValue();
+
+ if (RESERVED_METADATA_KEYWORDS.contains(key)) {
+ result.put(key, value);
+ } else {
+ // Expand namespace into ext
+ addNested(ext, key, value);
+ }
+ }
+
+ if (!ext.isEmpty()) {
+ result.put("ext", ext);
+ }
+
+ return result;
+ }
+
+ /**
+ * Adds a namespaced key to a map structure using "last dot" flattening logic.
+ *
+ *
e.g. "a.b.c" -> { "a.b": { "c": value } }
+ *
+ * @param root The root map to add to.
+ * @param key The dot-separated key (e.g., "a.b.c").
+ * @param value The value to set.
+ */
+ @SuppressWarnings("unchecked")
+ private static void addNested(Map root, String key, Object value) {
+ int lastDot = key.lastIndexOf('.');
+ if (lastDot == -1) {
+ root.put(key, value);
+ } else {
+ String parentKey = key.substring(0, lastDot);
+ String childKey = key.substring(lastDot + 1);
+
+ if (!root.containsKey(parentKey) || !(root.get(parentKey) instanceof Map)) {
+ root.put(parentKey, new HashMap());
+ }
+ ((Map) root.get(parentKey)).put(childKey, value);
+ }
+ }
+
+ /** Internal class to represent a message source during parsing. */
+ private static class MessageSource {
+ Role role;
+ String source;
+ List content;
+ Map metadata;
+
+ MessageSource(Role role, String source) {
+ this.role = role;
+ this.source = source;
+ this.content = null;
+ this.metadata = null;
+ }
+
+ MessageSource(Role role, List content, Map metadata) {
+ this.role = role;
+ this.source = null;
+ this.content = content;
+ this.metadata = metadata;
+ }
+ }
+}
diff --git a/java/com/google/dotprompt/parser/ParserTest.java b/java/com/google/dotprompt/parser/ParserTest.java
new file mode 100644
index 000000000..7aa07bfa4
--- /dev/null
+++ b/java/com/google/dotprompt/parser/ParserTest.java
@@ -0,0 +1,523 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package com.google.dotprompt.parser;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.dotprompt.models.DataArgument;
+import com.google.dotprompt.models.MediaPart;
+import com.google.dotprompt.models.Message;
+import com.google.dotprompt.models.Part;
+import com.google.dotprompt.models.PendingPart;
+import com.google.dotprompt.models.Prompt;
+import com.google.dotprompt.models.Role;
+import com.google.dotprompt.models.TextPart;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for the Parser class. */
+@RunWith(JUnit4.class)
+public class ParserTest {
+
+ @Test
+ public void testParseWithFrontmatter() throws IOException {
+ String content =
+ "---\n"
+ + "input:\n"
+ + " schema:\n"
+ + " type: object\n"
+ + "---\n"
+ + "Start of the template.";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo("Start of the template.");
+ assertThat(prompt.config()).containsKey("input");
+ }
+
+ @Test
+ public void testParseWithoutFrontmatter() throws IOException {
+ String content = "Just a template.";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo("Just a template.");
+ assertThat(prompt.config()).isEmpty();
+ }
+
+ @Test
+ public void testParseEmptyFrontmatter() throws IOException {
+ String content = "---\n---\nTemplate";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo("Template");
+ assertThat(prompt.config()).isEmpty();
+ }
+
+ @Test
+ public void testParseWhitespacePreservation() throws IOException {
+ String content = "---\nfoo: bar\n---\n Indented.\n";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo(" Indented.\n");
+ }
+
+ @Test
+ public void testParseCRLF() throws IOException {
+ String content = "---\r\nfoo: bar\r\n---\r\nBody";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo("Body");
+ @SuppressWarnings("unchecked")
+ Map ext = (Map) prompt.config().get("ext");
+ assertThat(ext).containsEntry("foo", "bar");
+ }
+
+ @Test
+ public void testParseMultilineFrontmatter() throws IOException {
+ String content = "---\nfoo: bar\nbaz: qux\n---\nBody";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo("Body");
+ @SuppressWarnings("unchecked")
+ Map ext = (Map) prompt.config().get("ext");
+ assertThat(ext).containsEntry("foo", "bar");
+ assertThat(ext).containsEntry("baz", "qux");
+ }
+
+ @Test
+ public void testParseExtraMarkers() throws IOException {
+ String content = "---\nfoo: bar\n---\nBody\n---\nExtra";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo("Body\n---\nExtra");
+ @SuppressWarnings("unchecked")
+ Map ext = (Map) prompt.config().get("ext");
+ assertThat(ext).containsEntry("foo", "bar");
+ }
+
+ @Test
+ public void testParseWithCR() throws IOException {
+ String content = "---\rfoo: bar\r---\rBody";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo("Body");
+ @SuppressWarnings("unchecked")
+ Map ext = (Map) prompt.config().get("ext");
+ assertThat(ext).containsEntry("foo", "bar");
+ }
+
+ @Test
+ public void testParseFrontmatterWithExtraSpaces() throws IOException {
+ String content = "--- \nfoo: bar\n--- \nBody";
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo("Body");
+ @SuppressWarnings("unchecked")
+ Map ext = (Map) prompt.config().get("ext");
+ assertThat(ext).containsEntry("foo", "bar");
+ }
+
+ @Test
+ public void testParseNamespacedKeys() throws IOException {
+ String content = "---\na.b.c: val\n---\nBody";
+ Prompt prompt = Parser.parse(content);
+ @SuppressWarnings("unchecked")
+ Map ext = (Map) prompt.config().get("ext");
+
+ // Expect: { "a.b": { "c": "val" } }
+ assertThat(ext).containsKey("a.b");
+ @SuppressWarnings("unchecked")
+ Map ab = (Map) ext.get("a.b");
+ assertThat(ab).containsEntry("c", "val");
+ }
+
+ @Test
+ public void testParseIncompleteFrontmatter() throws IOException {
+ String content = "---\nfoo: bar\nBody"; // Missing second marker
+ Prompt prompt = Parser.parse(content);
+ assertThat(prompt.template()).isEqualTo(content);
+ assertThat(prompt.config()).isEmpty();
+ }
+
+ @Test
+ public void testRoleAndHistoryMarkerPattern_validPatterns() {
+ String[] validPatterns = {
+ "<<>>",
+ "<<>>",
+ "<<>>",
+ "<<>>",
+ "<<>>",
+ "<<>>"
+ };
+
+ for (String pattern : validPatterns) {
+ assertThat(Parser.ROLE_AND_HISTORY_MARKER_PATTERN.matcher(pattern).find()).isTrue();
+ }
+ }
+
+ @Test
+ public void testRoleAndHistoryMarkerPattern_invalidPatterns() {
+ String[] invalidPatterns = {
+ "<<>>", // uppercase not allowed
+ "<<>>", // numbers not allowed
+ "<<>>", // needs at least one letter
+ "<<>>", // missing role value
+ "<<>>", // history should be exact
+ "<<>>", // history must be lowercase
+ "dotprompt:role:user", // missing brackets
+ "<<>>", // incomplete opening
+ };
+
+ for (String pattern : invalidPatterns) {
+ assertThat(Parser.ROLE_AND_HISTORY_MARKER_PATTERN.matcher(pattern).find()).isFalse();
+ }
+ }
+
+ @Test
+ public void testSplitByRoleAndHistoryMarkers_noMarkers() {
+ List result = Parser.splitByRoleAndHistoryMarkers("Hello World");
+ assertThat(result).containsExactly("Hello World");
+ }
+
+ @Test
+ public void testSplitByRoleAndHistoryMarkers_singleMarker() {
+ List result =
+ Parser.splitByRoleAndHistoryMarkers("Hello <<>> world");
+ assertThat(result).containsExactly("Hello ", "<< result = Parser.splitByRoleAndHistoryMarkers(" <<>> ");
+ assertThat(result).containsExactly("<< result =
+ Parser.splitByRoleAndHistoryMarkers("<<>><<>>");
+ assertThat(result).containsExactly("<< result =
+ Parser.splitByRoleAndHistoryMarkers(
+ "Start <<>> middle <<>> end");
+ assertThat(result)
+ .containsExactly(
+ "Start ", "<< result = Parser.splitByMediaAndSectionMarkers("Hello World");
+ assertThat(result).containsExactly("Hello World");
+ }
+
+ @Test
+ public void testSplitByMediaAndSectionMarkers_mediaMarker() {
+ List result =
+ Parser.splitByMediaAndSectionMarkers(
+ "<<>> https://example.com/image.jpg");
+ assertThat(result).containsExactly("<< result = Parser.toMessages("Hello world");
+
+ assertThat(result).hasSize(1);
+ assertThat(result.get(0).role()).isEqualTo(Role.USER);
+ assertThat(result.get(0).content()).hasSize(1);
+ assertThat(((TextPart) result.get(0).content().get(0)).text()).isEqualTo("Hello world");
+ }
+
+ @Test
+ public void testToMessages_singleRoleMarker() {
+ List result = Parser.toMessages("<<>>Hello world");
+
+ assertThat(result).hasSize(1);
+ assertThat(result.get(0).role()).isEqualTo(Role.MODEL);
+ assertThat(((TextPart) result.get(0).content().get(0)).text()).isEqualTo("Hello world");
+ }
+
+ @Test
+ public void testToMessages_multipleRoleMarkers() {
+ String renderedString =
+ "<<>>System instructions\n"
+ + "<<>>User query\n"
+ + "<<>>Model response";
+ List result = Parser.toMessages(renderedString);
+
+ assertThat(result).hasSize(3);
+
+ assertThat(result.get(0).role()).isEqualTo(Role.SYSTEM);
+ assertThat(((TextPart) result.get(0).content().get(0)).text())
+ .isEqualTo("System instructions\n");
+
+ assertThat(result.get(1).role()).isEqualTo(Role.USER);
+ assertThat(((TextPart) result.get(1).content().get(0)).text()).isEqualTo("User query\n");
+
+ assertThat(result.get(2).role()).isEqualTo(Role.MODEL);
+ assertThat(((TextPart) result.get(2).content().get(0)).text()).isEqualTo("Model response");
+ }
+
+ @Test
+ public void testToMessages_updatesRoleOfEmptyMessage() {
+ String renderedString = "<<>><<>>Response";
+ List result = Parser.toMessages(renderedString);
+
+ // Should only have one message since first role marker has no content
+ assertThat(result).hasSize(1);
+ assertThat(result.get(0).role()).isEqualTo(Role.MODEL);
+ assertThat(((TextPart) result.get(0).content().get(0)).text()).isEqualTo("Response");
+ }
+
+ @Test
+ public void testToMessages_emptyInputString() {
+ List result = Parser.toMessages("");
+ assertThat(result).isEmpty();
+ }
+
+ @Test
+ public void testToMessages_historyMarkersAddMetadata() {
+ String renderedString = "<<>>Query<<>>Follow-up";
+ List historyMessages =
+ List.of(
+ new Message(Role.USER, List.of(new TextPart("Previous question")), null),
+ new Message(Role.MODEL, List.of(new TextPart("Previous answer")), null));
+
+ DataArgument data = new DataArgument(null, null, historyMessages, null);
+ List result = Parser.toMessages(renderedString, data);
+
+ assertThat(result).hasSize(4);
+
+ // First message is the user query
+ assertThat(result.get(0).role()).isEqualTo(Role.USER);
+ assertThat(((TextPart) result.get(0).content().get(0)).text()).isEqualTo("Query");
+
+ // Next two messages are history with metadata
+ assertThat(result.get(1).role()).isEqualTo(Role.USER);
+ assertThat(result.get(1).metadata()).containsEntry("purpose", "history");
+
+ assertThat(result.get(2).role()).isEqualTo(Role.MODEL);
+ assertThat(result.get(2).metadata()).containsEntry("purpose", "history");
+
+ // Last message is the follow-up
+ assertThat(result.get(3).role()).isEqualTo(Role.MODEL);
+ assertThat(((TextPart) result.get(3).content().get(0)).text()).isEqualTo("Follow-up");
+ }
+
+ @Test
+ public void testToMessages_emptyHistory() {
+ String renderedString = "<<>>Query<<>>Follow-up";
+ DataArgument data = new DataArgument(null, null, List.of(), null);
+ List result = Parser.toMessages(renderedString, data);
+
+ assertThat(result).hasSize(2);
+ assertThat(result.get(0).role()).isEqualTo(Role.USER);
+ assertThat(result.get(1).role()).isEqualTo(Role.MODEL);
+ }
+
+ @Test
+ public void testTransformMessagesToHistory_addsMetadata() {
+ List messages =
+ List.of(
+ new Message(Role.USER, List.of(new TextPart("Hello")), null),
+ new Message(Role.MODEL, List.of(new TextPart("Hi there")), null));
+
+ List result = Parser.transformMessagesToHistory(messages);
+
+ assertThat(result).hasSize(2);
+ assertThat(result.get(0).metadata()).containsEntry("purpose", "history");
+ assertThat(result.get(1).metadata()).containsEntry("purpose", "history");
+ }
+
+ @Test
+ public void testTransformMessagesToHistory_preservesExistingMetadata() {
+ List messages =
+ List.of(new Message(Role.USER, List.of(new TextPart("Hello")), Map.of("foo", "bar")));
+
+ List result = Parser.transformMessagesToHistory(messages);
+
+ assertThat(result).hasSize(1);
+ assertThat(result.get(0).metadata()).containsEntry("foo", "bar");
+ assertThat(result.get(0).metadata()).containsEntry("purpose", "history");
+ }
+
+ @Test
+ public void testTransformMessagesToHistory_emptyArray() {
+ List result = Parser.transformMessagesToHistory(List.of());
+ assertThat(result).isEmpty();
+ }
+
+ @Test
+ public void testMessagesHaveHistory_true() {
+ List messages =
+ List.of(
+ new Message(Role.USER, List.of(new TextPart("Hello")), Map.of("purpose", "history")));
+
+ assertThat(Parser.messagesHaveHistory(messages)).isTrue();
+ }
+
+ @Test
+ public void testMessagesHaveHistory_false() {
+ List messages = List.of(new Message(Role.USER, List.of(new TextPart("Hello")), null));
+
+ assertThat(Parser.messagesHaveHistory(messages)).isFalse();
+ }
+
+ @Test
+ public void testInsertHistory_returnsOriginalIfNoHistory() {
+ List messages = List.of(new Message(Role.USER, List.of(new TextPart("Hello")), null));
+
+ List result = Parser.insertHistory(messages, List.of());
+
+ assertThat(result).isEqualTo(messages);
+ }
+
+ @Test
+ public void testInsertHistory_returnsOriginalIfHistoryExists() {
+ List messages =
+ List.of(
+ new Message(Role.USER, List.of(new TextPart("Hello")), Map.of("purpose", "history")));
+
+ List history =
+ List.of(
+ new Message(
+ Role.MODEL, List.of(new TextPart("Previous")), Map.of("purpose", "history")));
+
+ List result = Parser.insertHistory(messages, history);
+
+ assertThat(result).isEqualTo(messages);
+ }
+
+ @Test
+ public void testInsertHistory_insertsBeforeLastUserMessage() {
+ List messages =
+ List.of(
+ new Message(Role.SYSTEM, List.of(new TextPart("System prompt")), null),
+ new Message(Role.USER, List.of(new TextPart("Current question")), null));
+
+ List history =
+ List.of(
+ new Message(
+ Role.MODEL, List.of(new TextPart("Previous")), Map.of("purpose", "history")));
+
+ List result = Parser.insertHistory(messages, history);
+
+ assertThat(result).hasSize(3);
+ assertThat(result.get(0).role()).isEqualTo(Role.SYSTEM);
+ assertThat(result.get(1).role()).isEqualTo(Role.MODEL);
+ assertThat(result.get(1).metadata()).containsEntry("purpose", "history");
+ assertThat(result.get(2).role()).isEqualTo(Role.USER);
+ }
+
+ @Test
+ public void testInsertHistory_appendsIfNoUserMessageIsLast() {
+ List messages =
+ List.of(
+ new Message(Role.SYSTEM, List.of(new TextPart("System prompt")), null),
+ new Message(Role.MODEL, List.of(new TextPart("Model message")), null));
+
+ List history =
+ List.of(
+ new Message(
+ Role.MODEL, List.of(new TextPart("Previous")), Map.of("purpose", "history")));
+
+ List result = Parser.insertHistory(messages, history);
+
+ assertThat(result).hasSize(3);
+ assertThat(result.get(0).role()).isEqualTo(Role.SYSTEM);
+ assertThat(result.get(1).role()).isEqualTo(Role.MODEL);
+ assertThat(result.get(2).role()).isEqualTo(Role.MODEL);
+ assertThat(result.get(2).metadata()).containsEntry("purpose", "history");
+ }
+
+ @Test
+ public void testToParts_simpleText() {
+ List result = Parser.toParts("Hello World");
+ assertThat(result).hasSize(1);
+ assertThat(result.get(0)).isInstanceOf(TextPart.class);
+ assertThat(((TextPart) result.get(0)).text()).isEqualTo("Hello World");
+ }
+
+ @Test
+ public void testToParts_emptyString() {
+ List result = Parser.toParts("");
+ assertThat(result).isEmpty();
+ }
+
+ @Test
+ public void testParsePart_textPart() {
+ Part result = Parser.parsePart("Hello World");
+ assertThat(result).isInstanceOf(TextPart.class);
+ assertThat(((TextPart) result).text()).isEqualTo("Hello World");
+ }
+
+ @Test
+ public void testParsePart_mediaPart() {
+ Part result = Parser.parsePart("<<>> https://example.com/image.jpg");
+ assertThat(result).isInstanceOf(MediaPart.class);
+ assertThat(((MediaPart) result).media().url()).isEqualTo("https://example.com/image.jpg");
+ }
+
+ @Test
+ public void testParsePart_sectionPart() {
+ Part result = Parser.parsePart("<<>> code");
+ assertThat(result).isInstanceOf(PendingPart.class);
+ assertThat(((PendingPart) result).metadata()).containsEntry("purpose", "code");
+ assertThat(((PendingPart) result).metadata()).containsEntry("pending", true);
+ }
+
+ @Test
+ public void testParseMediaPart_basic() {
+ MediaPart result =
+ Parser.parseMediaPart("<<>> https://example.com/image.jpg");
+ assertThat(result.media().url()).isEqualTo("https://example.com/image.jpg");
+ assertThat(result.media().contentType()).isNull();
+ }
+
+ @Test
+ public void testParseMediaPart_withContentType() {
+ MediaPart result =
+ Parser.parseMediaPart("<<>> https://example.com/image.jpg image/jpeg");
+ assertThat(result.media().url()).isEqualTo("https://example.com/image.jpg");
+ assertThat(result.media().contentType()).isEqualTo("image/jpeg");
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testParseMediaPart_invalidPrefix() {
+ Parser.parseMediaPart("https://example.com/image.jpg");
+ }
+
+ @Test
+ public void testParseSectionPart_basic() {
+ PendingPart result = Parser.parseSectionPart("<<>> code");
+ assertThat(result.metadata()).containsEntry("purpose", "code");
+ assertThat(result.metadata()).containsEntry("pending", true);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testParseSectionPart_invalidPrefix() {
+ Parser.parseSectionPart("code");
+ }
+
+ @Test
+ public void testParseTextPart() {
+ TextPart result = Parser.parseTextPart("Hello World");
+ assertThat(result.text()).isEqualTo("Hello World");
+ }
+}
diff --git a/java/com/google/dotprompt/parser/Picoschema.java b/java/com/google/dotprompt/parser/Picoschema.java
new file mode 100644
index 000000000..e82f56475
--- /dev/null
+++ b/java/com/google/dotprompt/parser/Picoschema.java
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package com.google.dotprompt.parser;
+
+import com.google.dotprompt.resolvers.SchemaResolver;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+
+/**
+ * Picoschema parser and related helpers.
+ *
+ *
Picoschema is a compact, YAML-optimized schema definition format. This class compiles
+ * Picoschema to JSON Schema.
+ *
+ *