Skip to content

DQN training with 2.44.0 with gymnasium.env and action mask giving error #51700

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
bnemetchek2 opened this issue Mar 26, 2025 · 0 comments
Open

Comments

@bnemetchek2
Copy link

I'm attempting to use the new API but getting runtime error: AttributeError: 'NoneType' object has no attribute 'build_encoder'

I don't see an example of this scenario and am hoping someone an point me in the right direction.

import os
import ray
import time
import numpy as np
import torch
import torch.nn as nn
from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
from ray.rllib.algorithms.dqn import DQN
from ray.rllib.env.wrappers.unity3d_env import Unity3DEnv
from ray.tune.registry import register_env
from ray.rllib.env.env_context import EnvContext
from ray.tune.logger import pretty_print
import gymnasium as gym
from gymnasium import spaces
from pofc_env import POFCEnvironment
from ray.rllib.core.rl_module.rl_module import RLModule
from ray.rllib.algorithms.dqn.torch.default_dqn_torch_rl_module import DefaultDQNTorchRLModule
from ray.rllib.utils.annotations import override
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from typing import Dict, List, Tuple, Optional, Any

# Directory for saving models
MODEL_DIR = os.path.join(os.path.dirname(__file__), "../models")
os.makedirs(MODEL_DIR, exist_ok=True)

# Register our custom environment
ray.tune.register_env("pofc_env", lambda config: POFCEnvironment(config))


def train_pofc_agent():
    # Initialize Ray
    if os.environ.get("RAY_ADDRESS"):
        ray.init(ignore_reinit_error=True, address="auto")
    else:
        ray.init(ignore_reinit_error=True)

    # Configure DQN with new API settings
    config = (
        AlgorithmConfig()
        .environment(
            env="pofc_env",
            env_config={
                "server_address": "localhost:50051",
                "player_names": ["Player1", "Player2"],
                "is_training": True
            },
            action_mask_key="action_mask"
        )
        .training(
            train_batch_size_per_learner=64,  # New API parameter
            gamma=0.99,
            lr=0.001,
            num_epochs=1,
            # Configure model for dictionary observations
            model={
                "fcnet_hiddens": [64, 64]
            }
        )
        .env_runners(
            num_env_runners=2,
            num_envs_per_env_runner=4
        )
        .api_stack(
            enable_rl_module_and_learner=True,
            enable_env_runner_and_connector_v2=True
        )
        .resources(num_gpus=1 if torch.cuda.is_available() else 0)
    )

    # Build the algorithm
    algo = DQN(config=config)
   
    # Training loop
    num_iterations = 50
    for i in range(num_iterations):
        print(f"Training iteration {i+1}/{num_iterations}")
        result = algo.train()
        print(pretty_print(result))
        
        if (i + 1) % 50 == 0:
            checkpoint_dir = algo.save(MODEL_DIR)
            print(f"Checkpoint saved at {checkpoint_dir}")
        
        if (i + 1) % 5 == 0:
            print("Updating opponent policy with current policy")
            policy1_state = algo.get_policy().get_state()
            algo.get_policy().set_state(policy1_state)
    
    checkpoint_dir = algo.save(MODEL_DIR)
    print(f"Final model saved at {checkpoint_dir}")
    
    algo.stop()
    ray.shutdown()

if __name__ == "__main__":
    train_pofc_agent()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant