1
1
from django .db import models
2
2
from django .utils .text import slugify
3
- import requests
3
+ from . utils import get_summary_from_url , translate_to_korean , categorize_summary
4
4
import readtime
5
- from langchain .chains .summarize import load_summarize_chain
6
- from langchain_openai import ChatOpenAI
7
- from langchain_community .document_loaders .web_base import WebBaseLoader
8
- from langchain_core .prompts import ChatPromptTemplate
9
- from langchain_core .output_parsers import StrOutputParser
10
- from langchain_core .documents import Document
11
5
import os
12
6
13
7
@@ -51,7 +45,9 @@ def __str__(self):
51
45
return self .title or self .url
52
46
53
47
def calculate_reading_time (self , full_text : str ):
54
- """Calculates reading time based on the provided text."""
48
+ """
49
+ Calculates reading time based on the provided text.
50
+ """
55
51
if full_text :
56
52
try :
57
53
result = readtime .of_text (full_text )
@@ -70,34 +66,10 @@ def fetch_and_summarize(self) -> str:
70
66
if not self .url :
71
67
return "Error: No URL provided."
72
68
73
- full_content_text = "" # Variable to hold the full text
74
-
75
69
try :
76
- # --- Step 1: Load Content ---
77
- loader = WebBaseLoader (self .url )
78
- docs = loader .load () # Load documents
79
-
80
- if not docs or not docs [0 ].page_content :
81
- return "Error: No content could be loaded from the URL."
82
-
83
- full_content_text = docs [0 ].page_content # Store full text
70
+ summary_text = get_summary_from_url (self .url )
84
71
85
- if not self .title and docs [0 ].metadata .get ('title' ):
86
- self .title = docs [0 ].metadata .get ('title' )
87
-
88
- # --- Step 2: Calculate Reading Time (on full content) ---
89
- self .calculate_reading_time (full_content_text ) # Call updated method
90
-
91
- # --- Step 3: Generate Summary ---
92
- api_key = os .getenv ("OPENAI_API_KEY" )
93
- if not api_key :
94
- self .save (update_fields = ['title' , 'reading_time_minutes' , 'updated_at' ]) # Save what we have
95
- return "Error: OpenAI API key not found. Title/Reading Time saved."
96
-
97
- llm_summarize = ChatOpenAI (api_key = api_key , model_name = "gpt-4o" , temperature = 0.2 )
98
- chain_summarize = load_summarize_chain (llm_summarize , chain_type = "map_reduce" )
99
- summary_result = chain_summarize .invoke (docs )
100
- summary_text = summary_result .get ('output_text' , '' )
72
+ self .calculate_reading_time (summary_text ) # Call updated method
101
73
102
74
if not summary_text :
103
75
self .summary = ""
@@ -107,70 +79,46 @@ def fetch_and_summarize(self) -> str:
107
79
108
80
self .summary = summary_text # Set summary
109
81
110
- # === Integrate category assignment ===
111
82
categorization_status = "Categorization skipped (no summary)."
112
83
if self .summary : # Only categorize if summary was successful
113
84
categorization_status = self .assign_categories () # Call the revised method
114
85
print (f"Categorization status for article { self .id } : { categorization_status } " )
115
- # === End Integration ===
116
86
117
- # --- Step 4: Translate Summary (immediately after generation) ---
118
87
translation_status = self .translate_summary_to_korean () # Call translation
119
88
print (f"Translation status for article { self .id } : { translation_status } " )
120
89
translation_failed = "Error" in translation_status
121
90
122
- # === Adjust final save ===
123
- # ManyToMany fields are saved via .add()/.clear() within assign_categories.
124
- # Do NOT include 'categories' in update_fields.
125
91
self .save (update_fields = [
126
92
'title' ,
127
93
'summary' ,
128
94
'summary_ko' ,
129
95
'reading_time_minutes' ,
130
96
'updated_at'
131
- # 'categories' is NOT saved here
132
97
])
133
- # === End Adjust save ===
134
98
135
- # Update final message to include categorization status
136
99
translation_failed = "Error" in translation_status # Re-evaluate this variable if needed
137
- categorization_failed = "Error" in categorization_status or "Warning" in categorization_status
138
100
139
101
final_message = "Fetch, Read Time, Summary completed."
140
102
final_message += " Translation failed." if translation_failed else " Translation completed."
141
103
final_message += f" { categorization_status } " # Include categorization status message
142
104
return final_message
143
105
144
- except requests .exceptions .RequestException as e :
145
- return f"Error fetching URL: { str (e )} "
146
106
except ImportError as e :
147
107
return f"Error with required libraries: { str (e )} "
148
108
except Exception as e :
149
109
print (f"Unexpected error during fetch/summarize/translate for { self .id } : { e } " )
150
- # Optionally try saving minimal info on unexpected error:
151
- # self.save(update_fields=['title', 'reading_time_minutes', 'summary', 'summary_ko', 'updated_at'])
152
110
return f"Unexpected error processing article: { str (e )} "
153
111
154
112
def translate_summary_to_korean (self ):
155
- """Translates the summary to Korean using the OpenAI API via Langchain."""
113
+ """
114
+ Translates the summary to Korean using the OpenAI API via Langchain.
115
+ """
156
116
if not self .summary :
157
117
self .summary_ko = ""
158
118
return "No summary to translate."
159
119
160
- api_key = os .getenv ("OPENAI_API_KEY" )
161
- if not api_key :
162
- return "Error: OpenAI API key not found."
163
-
164
120
try :
165
- llm = ChatOpenAI (api_key = api_key , model_name = "gpt-4o" , temperature = 0.2 )
166
- prompt = ChatPromptTemplate .from_messages ([
167
- ("system" , "You are a helpful assistant that translates English text to Korean." ),
168
- ("user" , "Please translate the following English text accurately to Korean:\n \n {english_text}" )
169
- ])
170
- parser = StrOutputParser ()
171
- chain = prompt | llm | parser
172
-
173
- translated_text = chain .invoke ({"english_text" : self .summary })
121
+ translated_text = translate_to_korean (self .summary )
174
122
175
123
self .summary_ko = translated_text .strip () if translated_text else ""
176
124
self .save (update_fields = ['summary_ko' , 'updated_at' ])
@@ -188,55 +136,31 @@ def assign_categories(self):
188
136
self .categories .clear () # Clear existing categories if no summary
189
137
return "Error: No summary available to categorize."
190
138
191
- api_key = os .getenv ("OPENAI_API_KEY" )
192
- if not api_key :
193
- return "Error: OpenAI API key not found for categorization."
194
-
195
139
try :
196
- # Ensure defined categories exist in the DB (or create them)
197
140
defined_category_names = [
198
141
'Web Development' , 'MLOps' , 'Large Language Models' ,
199
142
'Data Science' , 'AI General' , 'Software Engineering' , 'Other'
200
143
]
201
- # Use get_or_create to simplify existence check and creation
202
144
category_objects = []
203
145
created_names = []
204
146
for name in defined_category_names :
205
147
cat , created = Category .objects .get_or_create (name = name )
206
148
category_objects .append (cat )
207
149
if created :
208
150
created_names .append (name )
209
- # Optionally save slug immediately if auto-generated
210
151
cat .save ()
152
+
211
153
if created_names :
212
154
print (f"Ensured categories exist. Created new: { created_names } " )
213
155
214
- # Prepare for LLM call
215
- llm = ChatOpenAI (api_key = api_key , model_name = "gpt-4o" , temperature = 0.1 )
216
- category_list_str = ", " .join ([f"'{ name } '" for name in defined_category_names ])
217
-
218
- prompt = ChatPromptTemplate .from_messages ([
219
- ("system" , f"You are a helpful assistant that categorizes technical articles based on their summary. "
220
- f"Assign one or more relevant categories from the following list: { category_list_str } . "
221
- f"Respond with ONLY the category names, separated by commas (e.g., 'Web Development, Large Language Models'). "
222
- f"If none fit well, respond with 'Other'." ),
223
- ("user" , "Please categorize the following article summary:\n \n {summary_text}" )
224
- ])
225
- parser = StrOutputParser ()
226
- chain = prompt | llm | parser
227
-
228
- response_text = chain .invoke ({"summary_text" : self .summary }).strip ()
229
-
230
- # Parse LLM response and get Category objects
156
+ response_text = categorize_summary (self .summary , defined_category_names ).replace ("'" , "" ).replace ('"' , "" )
231
157
assigned_category_names = [name .strip () for name in response_text .split (',' ) if name .strip ()]
232
158
233
- # Validate against our defined list and get actual Category objects from the DB
234
159
valid_categories = Category .objects .filter (name__in = assigned_category_names ).filter (name__in = defined_category_names )
235
160
valid_category_names = list (valid_categories .values_list ('name' , flat = True ))
236
161
237
162
print (f"LLM suggested: { assigned_category_names } , Validated & Found: { valid_category_names } " )
238
163
239
- # Update the article's categories relationship
240
164
self .categories .clear () # Remove old associations first
241
165
if valid_categories :
242
166
self .categories .add (* valid_categories ) # Add the new set using the splat operator
0 commit comments