Azure
diff --git a/‎sdk/python/foundation-models/system/reinforcement-learning/reinforcement-learning.ipynb‎
Lines changed: 759 additions & 747 deletions b/‎sdk/python/foundation-models/system/reinforcement-learning/reinforcement-learning.ipynb‎
Lines changed: 759 additions & 747 deletions
diff --git a/‎sdk/python/foundation-models/system/reinforcement-learning/requirements.txt‎
Lines changed: 8 additions & 4 deletions b/‎sdk/python/foundation-models/system/reinforcement-learning/requirements.txt‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎sdk/python/foundation-models/system/reinforcement-learning/scripts/dataset.py‎
Lines changed: 51 additions & 25 deletions b/‎sdk/python/foundation-models/system/reinforcement-learning/scripts/dataset.py‎
Lines changed: 51 additions & 25 deletions
diff --git a/‎sdk/python/foundation-models/system/reinforcement-learning/scripts/deployment.py‎
Lines changed: 31 additions & 27 deletions b/‎sdk/python/foundation-models/system/reinforcement-learning/scripts/deployment.py‎
Lines changed: 31 additions & 27 deletions
diff --git a/‎sdk/python/foundation-models/system/reinforcement-learning/scripts/evaluation.py‎
Lines changed: 16 additions & 8 deletions b/‎sdk/python/foundation-models/system/reinforcement-learning/scripts/evaluation.py‎
Lines changed: 16 additions & 8 deletions
@@ -1,4 +1,8 @@
-azure-ai-ml
-azure-identity
-huggingface_hub
-matplotlib
+# use python3.12 or above
+azure-ai-ml==1.30.0
+azure-identity==1.25.1
+azureml-mlflow==1.60.0.post1
+huggingface-hub==1.1.5
+matplotlib==3.10.7
+mlflow==2.22.2
+ipykernel
@@ -29,12 +29,16 @@ def download_finqa_dataset(src: str, target_dir: str = "data/raw"):
     with TemporaryDirectory() as tmpdir:
         print(f"Cloning raw FinQA dataset to {tmpdir} ...")
         subprocess.run(["git", "clone", src, tmpdir], check=True)
+        os.makedirs(target_dir, exist_ok=True)
         print("Converting FinQA dataset to jsonl format ...")
         dataset_dir = os.path.join(tmpdir, "dataset")
-        for file_name in os.listdir(dataset_dir):
-            target_file_name = file_name.split(".")[0] + ".jsonl"
-            os.makedirs(target_dir, exist_ok=True)
-            convert_to_jsonl(current_path=os.path.join(dataset_dir, file_name), target_path=os.path.join(target_dir, target_file_name))
+        filenames = ["train.json", "dev.json", "test.json"]
+        for filename in filenames:
+            target_file_name = filename.split(".")[0] + ".jsonl"
+            convert_to_jsonl(
+                current_path=os.path.join(dataset_dir, filename),
+                target_path=os.path.join(target_dir, target_file_name),
+            )
 
 
 def convert_to_jsonl(current_path: str, target_path: str):
@@ -46,8 +50,10 @@ def convert_to_jsonl(current_path: str, target_path: str):
     print(f"Converted {current_path} to {target_path}.")
 
 
-def prepare_finqa_dataset(ml_client: MLClient, data_dir: str = "data", register_datasets: bool = False) -> tuple[str, str, str]:
-    """Prepare the FinQA dataset for training and evaluation."""   
+def prepare_finqa_dataset(
+    ml_client: MLClient, data_dir: str = "data", register_datasets: bool = False
+) -> tuple[str, str, str]:
+    """Prepare the FinQA dataset for training and evaluation."""
     # VERL finetuning relies on acceptable data sources for reward modeling and evaluation
     data_source = "openai/gsm8k"
 
@@ -68,30 +74,42 @@ def format_list_to_string(data_list: list):
         return "\n".join(str(item) for item in data_list)
 
     def format_table(table_list: list):
-            """Format table data as string"""
-            if not table_list:
-                return ""
-            table_str = "\nTable:\n"
-            for row in table_list:
-                if isinstance(row, list):
-                    table_str += " | ".join(str(cell) for cell in row) + "\n"
-                else:
-                    table_str += str(row) + "\n"
-            return table_str
+        """Format table data as string"""
+        if not table_list:
+            return ""
+        table_str = "\nTable:\n"
+        for row in table_list:
+            if isinstance(row, list):
+                table_str += " | ".join(str(cell) for cell in row) + "\n"
+            else:
+                table_str += str(row) + "\n"
+        return table_str
 
     def map_fn(example: pd.Series, idx: int, split: str):
         """Map function to transform each example into desired format."""
         pre_instruction = "Please answer the following financial question based on the context provided."
-        post_instruction = 'Let\'s think step by step and output the final answer after "####".'
+        post_instruction = (
+            'Let\'s think step by step and output the final answer after "####".'
+        )
         qa = example.get("qa", {})
         question = qa.get("question", "")
-        answer = qa.get('answer', qa.get('exe_ans', ''))
-        gold_evidence = "\n".join(qa.get('gold_inds', {}).values())
+        answer = qa.get("answer", qa.get("exe_ans", ""))
+        gold_evidence = "\n".join(qa.get("gold_inds", {}).values())
         pre_text = format_list_to_string(example.get("pre_text", []))
         post_text = format_list_to_string(example.get("post_text", []))
-        table = format_table(example.get('table', [])).strip()
+        table = format_table(example.get("table", [])).strip()
         # Build prompt content according to specified schema
-        prompt_content = "\n\n".join([pre_instruction, "Context: " + pre_text, gold_evidence, post_text, table, "Question: " + question, post_instruction])
+        prompt_content = "\n\n".join(
+            [
+                pre_instruction,
+                "Context: " + pre_text,
+                gold_evidence,
+                post_text,
+                table,
+                "Question: " + question,
+                post_instruction,
+            ]
+        )
         data = {
             "data_source": data_source,
             "prompt": [
@@ -117,9 +135,13 @@ def map_fn(example: pd.Series, idx: int, split: str):
     valid_dataset = pd.read_json(valid_dataset_path, lines=True)
 
     # map datasets
-    train_dataset = train_dataset.apply(lambda x: map_fn(x, x.name, split="train"), axis=1)
+    train_dataset = train_dataset.apply(
+        lambda x: map_fn(x, x.name, split="train"), axis=1
+    )
     test_dataset = test_dataset.apply(lambda x: map_fn(x, x.name, split="test"), axis=1)
-    valid_dataset = valid_dataset.apply(lambda x: map_fn(x, x.name, split="valid"), axis=1)
+    valid_dataset = valid_dataset.apply(
+        lambda x: map_fn(x, x.name, split="valid"), axis=1
+    )
 
     # save locally as jsonl
     train_dataset_path = os.path.join(data_dir, "train.jsonl")
@@ -134,7 +156,11 @@ def map_fn(example: pd.Series, idx: int, split: str):
         train_data = register_dataset(ml_client, "finqa_train", train_dataset_path)
         test_data = register_dataset(ml_client, "finqa_test", test_dataset_path)
         valid_data = register_dataset(ml_client, "finqa_valid", valid_dataset_path)
-        if (train_data and train_data.id) and (test_data and test_data.id) and (valid_data and valid_data.id):
+        if (
+            (train_data and train_data.id)
+            and (test_data and test_data.id)
+            and (valid_data and valid_data.id)
+        ):
             return train_data.id, test_data.id, valid_data.id
-    
+
     return train_dataset_path, test_dataset_path, valid_dataset_path
@@ -15,51 +15,53 @@
 
 def get_default_probe_settings() -> ProbeSettings:
     """Get default probe settings for deployments."""
-    return ProbeSettings(                                     # Probes are APIs exposed by the deployment which informs the frameworktraffic
-        initial_delay=1400,                                   # if the deployment is healthy and ready to receive 
+    return ProbeSettings(  # Probes are APIs exposed by the deployment which informs the frameworktraffic
+        initial_delay=1400,  # if the deployment is healthy and ready to receive
         period=30,
         timeout=2,
         success_threshold=1,
-        failure_threshold=30
+        failure_threshold=30,
     )
 
 
 def get_default_request_settings() -> OnlineRequestSettings:
     """Get default request settings for deployments."""
-    return OnlineRequestSettings(                            # Online request setting which controls timeout and concurrent request per instance
+    return OnlineRequestSettings(  # Online request setting which controls timeout and concurrent request per instance
         request_timeout_ms=90000,
         max_concurrent_requests_per_instance=4,
     )
 
 
 def create_managed_deployment(
     ml_client: MLClient,
-    model_asset_id: str,                                                    # Asset ID of the model to deploy
-    instance_type: str,                                                     # Supported instance type for managed deployment
-    environment_asset_id: Optional[str] = None,                                              # Asset ID of the serving engine to use
+    model_asset_id: str,  # Asset ID of the model to deploy
+    instance_type: str,  # Supported instance type for managed deployment
+    environment_asset_id: Optional[str] = None,  # Asset ID of the serving engine to use
     endpoint_name: Optional[str] = None,
     endpoint_description: str = "Sample endpoint",
     endpoint_tags: dict = {},
     deployment_name: Optional[str] = None,
     deployment_env_vars: dict = {},
 ) -> str:
     """Create a managed deployment."""
-    guid = str(uuid.uuid4())[:8]                                      # Unique suffix to avoid name collisions
+    guid = str(uuid.uuid4())[:8]  # Unique suffix to avoid name collisions
     endpoint_name = endpoint_name or f"rl-endpoint"
-    endpoint_name = f"{endpoint_name}-{guid}"                         # Unique names prevent collisions and allow parallel experiments
+    endpoint_name = f"{endpoint_name}-{guid}"  # Unique names prevent collisions and allow parallel experiments
     deployment_name = deployment_name or "default"
 
-    endpoint = ManagedOnlineEndpoint(                              # Use AzureML endpoint abstraction for traffic management and auth
+    endpoint = ManagedOnlineEndpoint(  # Use AzureML endpoint abstraction for traffic management and auth
         name=endpoint_name,
         auth_mode="key",
         description=endpoint_description,
         tags=endpoint_tags,
     )
 
     print(f"Creating endpoint: {endpoint_name}")
-    ml_client.online_endpoints.begin_create_or_update(endpoint).wait()  # Using there the endpoint object to trigger actual endpoint in AML workspace.
+    ml_client.online_endpoints.begin_create_or_update(
+        endpoint
+    ).wait()  # Using there the endpoint object to trigger actual endpoint in AML workspace.
 
-    deployment = ManagedOnlineDeployment(                            # Use deployment abstraction for scaling, versioning, and isolation
+    deployment = ManagedOnlineDeployment(  # Use deployment abstraction for scaling, versioning, and isolation
         name=deployment_name,
         endpoint_name=endpoint_name,
         model=model_asset_id,
@@ -72,8 +74,8 @@ def create_managed_deployment(
         request_settings=get_default_request_settings(),
     )
 
-    print(f"Creating deployment (15-20 min)...")                        #                       
-    ml_client.online_deployments.begin_create_or_update(deployment).wait()  
+    print(f"Creating deployment (15-20 min)...")  #
+    ml_client.online_deployments.begin_create_or_update(deployment).wait()
 
     # Route all traffic to new deployment for immediate use
     endpoint.traffic = {deployment_name: 100}
@@ -86,10 +88,10 @@ def create_managed_deployment(
 
 def create_kubernetes_deployment(
     ml_client: MLClient,
-    model_asset_id: str,                                                    # Asset ID of the model to deploy
-    environment_asset_id: str,                                              # Asset ID of the serving engine to use
-    instance_type: str,                                                     # Kubernetes supports partial node usage granular upto the GPU level
-    compute_name: str,                                                      # Name of the compute which will be use for endpoint creation
+    model_asset_id: str,  # Asset ID of the model to deploy
+    environment_asset_id: str,  # Asset ID of the serving engine to use
+    instance_type: str,  # Kubernetes supports partial node usage granular upto the GPU level
+    compute_name: str,  # Name of the compute which will be use for endpoint creation
     endpoint_name: Optional[str] = None,
     endpoint_description: str = "Sample endpoint",
     endpoint_tags: dict = {},
@@ -98,15 +100,15 @@ def create_kubernetes_deployment(
     model_mount_path: str = "/var/model-mount",
 ) -> str:
     """Create endpoint using Kubernetes."""
-                                                                    
+
     print("🌐 Creating endpoint...")
 
-    guid = str(uuid.uuid4())[:8]                                      # Unique suffix to avoid name collisions
+    guid = str(uuid.uuid4())[:8]  # Unique suffix to avoid name collisions
     endpoint_name = endpoint_name or f"rl-endpoint"
-    endpoint_name = f"{endpoint_name}-{guid}"                         # Unique names prevent collisions and allow parallel experiments
+    endpoint_name = f"{endpoint_name}-{guid}"  # Unique names prevent collisions and allow parallel experiments
     deployment_name = deployment_name or "default"
 
-    endpoint = KubernetesOnlineEndpoint(                              # Use AzureML endpoint abstraction for traffic management and auth
+    endpoint = KubernetesOnlineEndpoint(  # Use AzureML endpoint abstraction for traffic management and auth
         name=endpoint_name,
         auth_mode="key",
         compute=compute_name,
@@ -115,9 +117,11 @@ def create_kubernetes_deployment(
     )
 
     print(f"Creating endpoint: {endpoint_name}")
-    ml_client.online_endpoints.begin_create_or_update(endpoint).wait()  # Using there the endpoint object to trigger actual endpoint in AML workspace.
+    ml_client.online_endpoints.begin_create_or_update(
+        endpoint
+    ).wait()  # Using there the endpoint object to trigger actual endpoint in AML workspace.
 
-    deployment = KubernetesOnlineDeployment(                            # Use deployment abstraction for scaling, versioning, and isolation
+    deployment = KubernetesOnlineDeployment(  # Use deployment abstraction for scaling, versioning, and isolation
         name=deployment_name,
         endpoint_name=endpoint_name,
         model=model_asset_id,
@@ -131,8 +135,8 @@ def create_kubernetes_deployment(
         request_settings=get_default_request_settings(),
     )
 
-    print(f"Creating deployment (15-20 min)...")                        #                       
-    ml_client.online_deployments.begin_create_or_update(deployment).wait()  
+    print(f"Creating deployment (15-20 min)...")  #
+    ml_client.online_deployments.begin_create_or_update(deployment).wait()
 
     # Route all traffic to new deployment for immediate use
     endpoint.traffic = {deployment_name: 100}
@@ -165,7 +169,7 @@ def test_deployment(ml_client, endpoint_name):
 Context: A company has revenue of $1,000,000 and expenses of $750,000.
 
 Question: What is the profit margin as a percentage?
-Let's think step by step and put final answer after ####."""
+Let's think step by step and put final answer after ####.""",
             }
         ],
         "max_tokens": 512,
 
@@ -5,7 +5,8 @@
 from azure.ai.ml.entities import Job
 from scripts.run import monitor_run
 
-class EvaluationPipeline():
+
+class EvaluationPipeline:
     """Run Evaluation"""
 
     DEFAULT_CONFIGS = {
@@ -23,8 +24,7 @@ def __init__(self, ml_client: MLClient, registry_ml_client: MLClient):
         self.guid = str(uuid.uuid4())[:8]
         self._ml_client = ml_client
         self._eval_pipeline_component = registry_ml_client.components.get(
-            name="pipeline_model_evaluation",
-            label="latest"
+            name="pipeline_model_evaluation", label="latest"
         )
 
     def create_evaluate_pipeline(
@@ -35,7 +35,7 @@ def create_evaluate_pipeline(
         validation_dataset_path: Input,
         base_model_path: Optional[Input] = None,
         instance_type: Optional[str] = None,
-        config = {},
+        config={},
     ) -> Job:
         """Create and submit evaluation pipeline job using registry component."""
 
@@ -52,7 +52,7 @@ def create_pipeline():
                 checkpoint_base_path_1=model_dir_1,
                 checkpoint_base_path_2=model_dir_2,
                 validation_file=validation_dataset_path,
-                **self.DEFAULT_CONFIGS
+                **self.DEFAULT_CONFIGS,
             )
             return {"evaluation_results": eval_pipeline.outputs.evaluation_results}
 
@@ -68,7 +68,9 @@ def create_pipeline():
         # Submit job
         print("✓ Submitting Model Evaluation Pipeline ...")
         pipeline_object.display_name = f"evaluate-model-{self.guid}"
-        eval_run = self._ml_client.jobs.create_or_update(pipeline_object, experiment_name="evaluate-model")
+        eval_run = self._ml_client.jobs.create_or_update(
+            pipeline_object, experiment_name="evaluate-model"
+        )
 
         print(f"✓ Job submitted: {eval_run.name}")
         print(f"📊 Studio URL: {eval_run.studio_url}")
@@ -93,8 +95,14 @@ def run_evaluation_pipeline(
 
     grpo_model_input = Input(type=AssetTypes.URI_FOLDER, path=grpo_model_dir)
     rlpp_model_input = Input(type=AssetTypes.URI_FOLDER, path=rlpp_model_dir)
-    base_model_input = Input(type=AssetTypes.URI_FOLDER, path=base_model_path) if isinstance(base_model_path, str) else base_model_path
-    validation_dataset_input = Input(type=AssetTypes.URI_FILE, path=validation_dataset_path)
+    base_model_input = (
+        Input(type=AssetTypes.URI_FOLDER, path=base_model_path)
+        if isinstance(base_model_path, str)
+        else base_model_path
+    )
+    validation_dataset_input = Input(
+        type=AssetTypes.URI_FILE, path=validation_dataset_path
+    )
 
     eval_job = pipeline.create_evaluate_pipeline(
         compute=compute_cluster,