-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembedder.py
More file actions
98 lines (77 loc) · 2.93 KB
/
embedder.py
File metadata and controls
98 lines (77 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""Embedding generation for CivicAurAI — text + multimodal (video clips)."""
from __future__ import annotations
import logging
import vertexai
from vertexai.language_models import TextEmbeddingModel
from vertexai.vision_models import Image, MultiModalEmbeddingModel, Video
from config import config
logger = logging.getLogger(__name__)
_initialized = False
_text_model = None
_mm_model = None
def _ensure_init() -> None:
global _initialized
if not _initialized:
vertexai.init(project=config.gcp_project_id, location=config.gcp_region)
_initialized = True
def _get_text_model() -> TextEmbeddingModel:
global _text_model
if _text_model is None:
_ensure_init()
_text_model = TextEmbeddingModel.from_pretrained(config.text_embedding_model)
return _text_model
def _get_mm_model() -> MultiModalEmbeddingModel:
global _mm_model
if _mm_model is None:
_ensure_init()
_mm_model = MultiModalEmbeddingModel.from_pretrained(config.multimodal_embedding_model)
return _mm_model
def embed_text(text: str) -> list[float] | None:
"""Generate a text embedding using text-embedding-005.
Returns list of floats (768 dimensions), or None on failure.
"""
if not config.embeddings_enabled:
return None
try:
model = _get_text_model()
embeddings = model.get_embeddings([text])
vector = embeddings[0].values
logger.info("Generated text embedding (%d dims)", len(vector))
return vector
except Exception as e:
logger.warning("Text embedding failed: %s", e)
return None
def embed_image(gcs_uri: str) -> list[float] | None:
"""Generate a multimodal embedding for an image.
Accepts a gs:// URI. Uses multimodalembedding@001.
Returns list of floats (1408 dimensions), or None on failure.
"""
if not config.embeddings_enabled:
return None
try:
model = _get_mm_model()
image = Image.load_from_file(gcs_uri)
embeddings = model.get_embeddings(image=image)
vector = embeddings.image_embedding
logger.info("Generated image embedding (%d dims)", len(vector))
return vector
except Exception as e:
logger.warning("Image embedding failed: %s", e)
return None
def embed_video_clip(gcs_uri: str) -> list[float] | None:
"""Generate a multimodal embedding for a video clip.
Accepts a gs:// URI. Uses multimodalembedding@001.
Returns list of floats (1408 dimensions), or None on failure.
"""
if not config.embeddings_enabled:
return None
try:
model = _get_mm_model()
video = Video.load_from_file(gcs_uri)
embeddings = model.get_embeddings(video=video)
vector = embeddings.video_embedding
logger.info("Generated video clip embedding (%d dims)", len(vector))
return vector
except Exception as e:
logger.warning("Video clip embedding failed: %s", e)
return None