Skip to content

Commit 13a0eb6

Browse files
committed
Merge branch 'david-cpu-only-mode-1846' into david-cpu-only-llm
2 parents 66d263c + 32fb3b8 commit 13a0eb6

File tree

62 files changed

+592
-310
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+592
-310
lines changed

.github/workflows/ci_pipe.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ jobs:
137137
test:
138138
name: Test
139139
runs-on: linux-amd64-gpu-v100-latest-1
140-
timeout-minutes: 60
140+
# Consider lowering this back down to 60 minutes per https://github.com/nv-morpheus/Morpheus/issues/1948
141+
timeout-minutes: 90
141142
container:
142143
credentials:
143144
username: '$oauthtoken'

docs/source/conf.py

+1
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@
199199
r'^http://$',
200200
r'^https://$',
201201
r'https://(platform\.)?openai.com',
202+
r'https://code.visualstudio.com'
202203
]
203204

204205
# Add any paths that contain templates here, relative to this directory.

examples/gnn_fraud_detection_pipeline/stages/model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ def load_model(model_dir: str,
407407
n_layers=hyperparameters['n_layers'],
408408
embedding_size=hyperparameters['embedding_size'],
409409
target=hyperparameters['target_node']).to(device)
410-
model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pt')))
410+
model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pt'), weights_only=False))
411411

412412
return model, graph, hyperparameters
413413

examples/llm/common/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import logging
1616

1717
import pymilvus
18-
from langchain.embeddings import HuggingFaceEmbeddings # pylint: disable=no-name-in-module
18+
from langchain_community.embeddings import HuggingFaceEmbeddings
1919

2020
from morpheus_llm.llm.services.llm_service import LLMService
2121
from morpheus_llm.llm.services.nemo_llm_service import NeMoLLMService

examples/llm/vdb_upload/langchain.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616

1717
# pylint: disable=no-name-in-module
1818
from langchain.document_loaders.rss import RSSFeedLoader
19-
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
2019
from langchain.text_splitter import RecursiveCharacterTextSplitter
2120
from langchain.vectorstores.milvus import Milvus
21+
from langchain_community.embeddings import HuggingFaceEmbeddings
2222

2323
from examples.llm.vdb_upload.vdb_utils import DEFAULT_RSS_URLS
2424
from morpheus.utils.logging_timer import log_time

examples/llm/vdb_upload/module/content_extractor_module.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
import pypdfium2 as libpdfium
3030
from docx import Document
3131
from langchain.text_splitter import RecursiveCharacterTextSplitter
32-
from pydantic import BaseModel # pylint: disable=no-name-in-module
32+
from pydantic import BaseModel
33+
from pydantic import ConfigDict
3334
from pydantic import Field
3435
from pydantic import ValidationError
3536
from pydantic import field_validator
@@ -43,9 +44,7 @@ class CSVConverterSchema(BaseModel):
4344
chunk_overlap: int = 102 # Example default value
4445
chunk_size: int = 1024
4546
text_column_names: List[str]
46-
47-
class Config:
48-
extra = "forbid"
47+
model_config = ConfigDict(extra='forbid')
4948

5049

5150
class ContentExtractorSchema(BaseModel):
@@ -54,6 +53,7 @@ class ContentExtractorSchema(BaseModel):
5453
chunk_size: int = 512
5554
converters_meta: Dict[str, Dict] = Field(default_factory=dict)
5655
num_threads: int = 10
56+
model_config = ConfigDict(extra='forbid')
5757

5858
@field_validator('converters_meta', mode="before")
5959
@classmethod
@@ -66,9 +66,6 @@ def val_converters_meta(cls, to_validate: Dict[str, Dict]) -> Dict[str, Dict]:
6666
validated_meta[key] = value
6767
return validated_meta
6868

69-
class Config:
70-
extra = "forbid"
71-
7269

7370
logger = logging.getLogger(__name__)
7471

examples/llm/vdb_upload/module/file_source_pipe.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import mrc
2222
from pydantic import BaseModel
23+
from pydantic import ConfigDict
2324
from pydantic import Field
2425
from pydantic import ValidationError
2526

@@ -48,9 +49,7 @@ class FileSourcePipeSchema(BaseModel):
4849
vdb_resource_name: str
4950
watch: bool = False # Flag to watch file changes
5051
watch_interval: float = -5.0 # Interval to watch file changes
51-
52-
class Config:
53-
extra = "forbid"
52+
model_config = ConfigDict(extra='forbid')
5453

5554

5655
FileSourcePipeLoaderFactory = ModuleLoaderFactory("file_source_pipe", "morpheus_examples_llm", FileSourcePipeSchema)

examples/llm/vdb_upload/module/rss_source_pipe.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@
2020

2121
import mrc
2222
from pydantic import BaseModel
23+
from pydantic import ConfigDict
2324
from pydantic import Field
2425
from pydantic import ValidationError
25-
from pydantic import validator
26+
from pydantic import field_validator
2627

2728
from morpheus.modules.general.monitor import MonitorLoaderFactory
2829
from morpheus.modules.input.rss_source import RSSSourceLoaderFactory
@@ -52,8 +53,9 @@ class RSSSourcePipeSchema(BaseModel):
5253
strip_markup: bool = True
5354
vdb_resource_name: str
5455
web_scraper_config: Optional[Dict[Any, Any]] = None
56+
model_config = ConfigDict(extra='forbid')
5557

56-
@validator('feed_input', pre=True)
58+
@field_validator('feed_input')
5759
def validate_feed_input(cls, to_validate): # pylint: disable=no-self-argument
5860
if isinstance(to_validate, str):
5961
return [to_validate]
@@ -63,9 +65,6 @@ def validate_feed_input(cls, to_validate): # pylint: disable=no-self-argument
6365

6466
raise ValueError('feed_input must be a string or a list of strings')
6567

66-
class Config:
67-
extra = "forbid"
68-
6968

7069
RSSSourcePipeLoaderFactory = ModuleLoaderFactory("rss_source_pipe", "morpheus_examples_llm", RSSSourcePipeSchema)
7170

examples/llm/vdb_upload/module/schema_transform.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import mrc
2121
import mrc.core.operators as ops
2222
from pydantic import BaseModel
23+
from pydantic import ConfigDict
2324
from pydantic import Field
2425
from pydantic import ValidationError
2526

@@ -39,16 +40,12 @@ class ColumnTransformSchema(BaseModel):
3940
dtype: str
4041
op_type: str
4142
from_: Optional[str] = Field(None, alias="from")
42-
43-
class Config:
44-
extra = "forbid"
43+
model_config = ConfigDict(extra='forbid')
4544

4645

4746
class SchemaTransformSchema(BaseModel):
4847
schema_transform_config: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
49-
50-
class Config:
51-
extra = "forbid"
48+
model_config = ConfigDict(extra='forbid')
5249

5350

5451
SchemaTransformLoaderFactory = ModuleLoaderFactory("schema_transform", "morpheus_examples_llm", SchemaTransformSchema)

examples/llm/vdb_upload/module/vdb_resource_tagging_module.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import mrc
1818
from pydantic import BaseModel
19+
from pydantic import ConfigDict
1920
from pydantic import ValidationError
2021

2122
from morpheus.messages import ControlMessage
@@ -27,9 +28,7 @@
2728

2829
class VDBResourceTaggingSchema(BaseModel):
2930
vdb_resource_name: str
30-
31-
class Config:
32-
extra = "forbid"
31+
model_config = ConfigDict(extra='forbid')
3332

3433

3534
VDBResourceTaggingLoaderFactory = ModuleLoaderFactory("vdb_resource_tagging",

examples/llm/vdb_upload/module/web_scraper_module.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
import requests_cache
2323
from bs4 import BeautifulSoup
2424
from langchain.text_splitter import RecursiveCharacterTextSplitter
25-
from pydantic import BaseModel # pylint: disable=no-name-in-module
25+
from pydantic import BaseModel
26+
from pydantic import ConfigDict
2627
from pydantic import ValidationError
2728

2829
import cudf
@@ -41,9 +42,7 @@ class WebScraperSchema(BaseModel):
4142
enable_cache: bool = False
4243
cache_path: str = "./.cache/http/RSSDownloadStage.sqlite"
4344
cache_dir: str = "./.cache/llm/rss"
44-
45-
class Config:
46-
extra = "forbid"
45+
model_config = ConfigDict(extra='forbid')
4746

4847

4948
WebScraperLoaderFactory = ModuleLoaderFactory("web_scraper", "morpheus_examples_llm", WebScraperSchema)

examples/ransomware_detection/stages/create_features.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020

2121
import cudf
2222

23-
from common.data_models import FeatureConfig # pylint: disable=no-name-in-module
24-
from common.feature_extractor import FeatureExtractor # pylint: disable=no-name-in-module
2523
from morpheus.cli.register_stage import register_stage
2624
from morpheus.config import Config
2725
from morpheus.config import PipelineModes
@@ -30,6 +28,9 @@
3028
from morpheus.pipeline.control_message_stage import ControlMessageStage
3129
from morpheus.pipeline.preallocator_mixin import PreallocatorMixin
3230

31+
from common.data_models import FeatureConfig # pylint: disable=no-name-in-module # isort: skip
32+
from common.feature_extractor import FeatureExtractor # pylint: disable=no-name-in-module # isort: skip
33+
3334

3435
@register_stage("create-features", modes=[PipelineModes.FIL])
3536
class CreateFeaturesRWStage(PreallocatorMixin, ControlMessageStage):

examples/ransomware_detection/stages/preprocessing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import mrc
1919
import pandas as pd
2020

21-
from common.data_models import SnapshotData # pylint: disable=no-name-in-module
2221
from morpheus.cli.register_stage import register_stage
2322
from morpheus.common import TypeId
2423
from morpheus.config import Config
@@ -27,6 +26,8 @@
2726
from morpheus.messages import InferenceMemoryFIL
2827
from morpheus.stages.preprocess.preprocess_base_stage import PreprocessBaseStage
2928

29+
from common.data_models import SnapshotData # pylint: disable=no-name-in-module #isort:skip
30+
3031

3132
@register_stage("ransomware-preprocess", modes=[PipelineModes.FIL])
3233
class PreprocessingRWStage(PreprocessBaseStage):

pyproject.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ filterwarnings = [
4242

4343
testpaths = ["tests"]
4444

45-
addopts = "--benchmark-disable"
45+
# Don't run the benchmarks by default, don't search for tests in the tests/_utils directory which will trigger false
46+
# alarms
47+
addopts = "--benchmark-disable --ignore=tests/_utils"
4648

4749
asyncio_mode = "auto"
4850

python/morpheus/morpheus/controllers/file_to_df_controller.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from morpheus.utils.column_info import PreparedDFInfo
3131
from morpheus.utils.column_info import process_dataframe
3232
from morpheus.utils.downloader import Downloader
33+
from morpheus.utils.downloader import DownloadMethods
3334

3435
logger = logging.getLogger(__name__)
3536

@@ -103,6 +104,9 @@ class FileToDFController:
103104
Directory where cache will be stored.
104105
timestamp_column_name : str
105106
Name of the timestamp column.
107+
download_method : typing.Union[DownloadMethods, str], optional, default = DownloadMethods.DASK_THREAD
108+
The download method to use, if the `MORPHEUS_FILE_DOWNLOAD_TYPE` environment variable is set, it takes
109+
presedence.
106110
"""
107111

108112
def __init__(self,
@@ -111,7 +115,8 @@ def __init__(self,
111115
file_type: FileTypes,
112116
parser_kwargs: dict,
113117
cache_dir: str,
114-
timestamp_column_name: str):
118+
timestamp_column_name: str,
119+
download_method: typing.Union[DownloadMethods, str] = DownloadMethods.DASK_THREAD):
115120

116121
self._schema = schema
117122
self._file_type = file_type
@@ -120,7 +125,7 @@ def __init__(self,
120125
self._cache_dir = os.path.join(cache_dir, "file_cache")
121126
self._timestamp_column_name = timestamp_column_name
122127

123-
self._downloader = Downloader()
128+
self._downloader = Downloader(download_method=download_method)
124129

125130
def _get_or_create_dataframe_from_batch(
126131
self, file_object_batch: typing.Tuple[fsspec.core.OpenFiles, int]) -> typing.Tuple[pd.DataFrame, bool]:

python/morpheus/morpheus/controllers/monitor_controller.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828

2929
logger = logging.getLogger(__name__)
3030

31-
SupportedTypes = typing.Union[DataFrameType, MessageMeta, ControlMessage, list]
32-
3331

3432
class MonitorController:
3533
"""
@@ -59,6 +57,7 @@ class MonitorController:
5957
Custom implementation of tqdm if required.
6058
"""
6159

60+
SupportedTypes = typing.Union[DataFrameType, MessageMeta, ControlMessage, list]
6261
controller_count: int = 0
6362

6463
def __init__(self,

python/morpheus/morpheus/controllers/rss_controller.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
import requests_cache
2929

3030
from morpheus.messages import MessageMeta
31+
from morpheus.utils.type_aliases import DataFrameModule
3132
from morpheus.utils.type_aliases import DataFrameType
32-
from morpheus.utils.type_aliases import DataFrameTypeStr
3333
from morpheus.utils.type_utils import get_df_class
3434

3535
logger = logging.getLogger(__name__)
@@ -107,7 +107,7 @@ def __init__(self,
107107
stop_after: int = 0,
108108
interval_secs: float = 600,
109109
should_stop_fn: Callable[[], bool] = None,
110-
df_type: DataFrameTypeStr = "cudf"):
110+
df_type: DataFrameModule = "cudf"):
111111
if IMPORT_EXCEPTION is not None:
112112
raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
113113

0 commit comments

Comments
 (0)