-
Notifications
You must be signed in to change notification settings - Fork 872
Description
Hi I used dimendsion reduction tehcniques and saved the model.
I can load it, but it doesnt predict topics for a new dataset,
umap_model = UMAP(n_neighbors=15,
n_components=5,
min_dist=0.0,
metric='cosine')
hdbscan_model = HDBSCAN(min_cluster_size=15,
metric='euclidean',
cluster_selection_method='eom',
prediction_data=True)
topic_model = BERTopic(umap_model=umap_model,
hdbscan_model=hdbscan_model,
calculate_probabilities=True,
language=language,
nr_topics=50)
model_save_path = os.path.join(repo_path, location, f'{location}_model')
topic_model.save(model_save_path)
print(f"Model saved to {model_save_path}")
model_path = "/content/drive/MyDrive/istanbul-crm-topic-modeling/istanbul/istanbul/istanbul_model"
model = BERTopic.load(model_path)
data_to_predict_path = "/content/drive/MyDrive/istanbul-crm-topic-modeling/istanbul/stratified_sample_20K.json"
df_to_predict = pd.read_json(data_to_predict_path, orient="records", lines=True)
docs_to_predict = df_to_predict['Başvuru Açıklaması'].tolist() # Replace with your actual column name
topics, probabilities = model.transform(docs_to_predict)
AttributeError: 'tuple' object has no attribute 'shape'