10
10
11
11
from utils import read_gitignore , is_ignored , find_projects
12
12
13
+ IGNORED_DIRS = {'.git' , '__pycache__' , '.idea' , '.vscode' , '.venv' , '.eggs' }
14
+
13
15
14
16
def analyze_project_structure (directory , task_queue ):
15
17
ignored_paths = read_gitignore (directory )
@@ -50,14 +52,30 @@ def analyze_project_structure(directory, task_queue):
50
52
51
53
52
54
53
- def parse_python_files (projects_dir ):
55
+ def parse_python_files (projects_dir , export = True , max_files = 5000 , max_depth = 6 ):
56
+ import os , ast , datetime
57
+ import pandas as pd
58
+
59
+ IGNORED_DIRS = {'.git' , '__pycache__' , '.idea' , '.vscode' , 'venv' , '.venv' , 'env' , '.env' , '.mypy_cache' }
60
+
54
61
project_stats = {}
62
+ scanned_files = 0
63
+
64
+ for root , dirs , files in os .walk (projects_dir ):
65
+ # Удаление игнорируемых директорий
66
+ dirs [:] = [d for d in dirs if d not in IGNORED_DIRS ]
67
+
68
+ # Ограничение глубины
69
+ rel_root = os .path .relpath (root , projects_dir )
70
+ depth = rel_root .count (os .sep )
71
+ if depth > max_depth :
72
+ continue
55
73
56
- # Используем find_projects для поиска папок с проектами
57
- projects = find_projects (projects_dir )
74
+ py_files = [f for f in files if f .endswith (".py" )]
75
+ if not py_files :
76
+ continue
58
77
59
- for project_dir in projects :
60
- project_name = os .path .relpath (project_dir , projects_dir ).split (os .sep )[0 ]
78
+ project_name = rel_root .replace (os .sep , " / " ) if rel_root != "." else "ROOT"
61
79
62
80
if project_name not in project_stats :
63
81
project_stats [project_name ] = {
@@ -67,98 +85,151 @@ def parse_python_files(projects_dir):
67
85
"dirs" : set ()
68
86
}
69
87
70
- # Склонение директорий с проектами
71
- for root , dirs , files in os .walk (project_dir ):
72
- for file in files :
73
- if file .endswith (".py" ):
74
- file_path = os .path .join (root , file )
75
-
76
- # Обновляем счётчик Python файлов
77
- project_stats [project_name ]["py_count" ] += 1
78
-
79
- # Обновляем дату создания проекта
80
- creation_time = os .path .getctime (file_path )
81
- creation_date = datetime .datetime .fromtimestamp (creation_time )
82
-
83
- current_created = project_stats [project_name ]["created" ]
84
- if current_created is None or creation_date < current_created :
85
- project_stats [project_name ]["created" ] = creation_date
86
-
87
- # Сбор директорий
88
- rel_dir = os .path .relpath (root , os .path .join (projects_dir , project_name ))
89
- if rel_dir != "." :
90
- project_stats [project_name ]["dirs" ].add (rel_dir )
91
-
92
- # Парсим файл для библиотек
93
- try :
94
- with open (file_path , "r" , encoding = "utf-8" ) as f :
95
- node = ast .parse (f .read (), filename = file_path )
96
-
97
- for sub_node in ast .walk (node ):
98
- if isinstance (sub_node , ast .Import ):
99
- for alias in sub_node .names :
100
- project_stats [project_name ]["libs" ].add (alias .name .split ('.' )[0 ])
101
- elif isinstance (sub_node , ast .ImportFrom ):
102
- if sub_node .module :
103
- project_stats [project_name ]["libs" ].add (sub_node .module .split ('.' )[0 ])
104
- except Exception :
105
- continue
106
-
107
- # Приведение к нужному формату
108
- for proj in project_stats :
109
- project_stats [proj ]["libs" ] = sorted (project_stats [proj ]["libs" ])
110
- project_stats [proj ]["dirs" ] = sorted (project_stats [proj ]["dirs" ])
111
- if project_stats [proj ]["created" ]:
112
- project_stats [proj ]["created" ] = project_stats [proj ]["created" ].strftime ("%Y-%m-%d %H:%M:%S" )
113
-
114
- return project_stats
88
+ for file in py_files :
89
+ """if scanned_files >= max_files:
90
+ print(f"⚠ Превышен лимит {max_files} файлов. Анализ остановлен.")
91
+ break"""
92
+
93
+ file_path = os .path .join (root , file )
94
+ scanned_files += 1
95
+ project_stats [project_name ]["py_count" ] += 1
96
+
97
+ # Обработка даты
98
+ try :
99
+ creation_time = os .path .getctime (file_path )
100
+ creation_date = datetime .datetime .fromtimestamp (creation_time )
101
+ current_created = project_stats [project_name ]["created" ]
102
+ if current_created is None or creation_date < current_created :
103
+ project_stats [project_name ]["created" ] = creation_date
104
+ except Exception :
105
+ pass
106
+
107
+ # Добавление относительной директории
108
+ rel_dir = os .path .relpath (root , projects_dir )
109
+ if rel_dir != "." :
110
+ project_stats [project_name ]["dirs" ].add (rel_dir )
111
+
112
+ # Парсинг импортов
113
+ try :
114
+ with open (file_path , "r" , encoding = "utf-8" ) as f :
115
+ content = f .read ()
116
+ node = ast .parse (content , filename = file_path )
117
+ for sub_node in ast .walk (node ):
118
+ if isinstance (sub_node , ast .Import ):
119
+ for alias in sub_node .names :
120
+ project_stats [project_name ]["libs" ].add (alias .name .split ('.' )[0 ])
121
+ elif isinstance (sub_node , ast .ImportFrom ) and sub_node .module :
122
+ project_stats [project_name ]["libs" ].add (sub_node .module .split ('.' )[0 ])
123
+ except Exception :
124
+ continue
125
+
126
+ print (f"[✓] { project_name } — { len (py_files )} файлов" )
127
+
128
+ # Финальная сборка
129
+ result = []
130
+ for proj , data in project_stats .items ():
131
+ date_str = data ["created" ].strftime ("%Y-%m-%d %H:%M:%S" ) if data ["created" ] else None
132
+ result .append ({
133
+ "name" : proj ,
134
+ "stack" : sorted (data ["libs" ]),
135
+ "dirs" : sorted (data ["dirs" ]),
136
+ "date" : date_str ,
137
+ "py_count" : data ["py_count" ]
138
+ })
139
+
140
+ if not result :
141
+ print ("⚠ Не найдено проектов с .py файлами." )
142
+ return []
143
+
144
+ df = pd .DataFrame (result )
145
+ df ["date" ] = pd .to_datetime (df ["date" ], errors = "coerce" )
146
+
147
+ if export :
148
+ df .to_csv ("project_stats.csv" , index = False , encoding = "utf-8-sig" )
149
+ df .to_html ("project_stats.html" , index = False )
150
+
151
+ print ("==== Итог ====" )
152
+ print (df [["name" , "date" ]])
153
+ return df .to_dict ("records" )
154
+
155
+
156
+
115
157
116
158
117
159
def open_stats_window (root , project_data : list [dict ]):
118
160
stats_win = tk .Toplevel (root )
119
- stats_win .title ("Статистика проектов " )
120
- stats_win .geometry ("800x600 " )
161
+ stats_win .title ("📊 Статистика по проектам " )
162
+ stats_win .geometry ("1000x700 " )
121
163
122
- # Проверка наличия данных в project_data
123
164
if not project_data :
124
- tk .Label (stats_win , text = "Нет данных для отображения статистики." ).pack (pady = 20 )
165
+ tk .Label (stats_win , text = "❌ Нет данных для отображения." , font = ( "Arial" , 12 ) ).pack (pady = 20 )
125
166
return
126
167
127
168
df = pd .DataFrame (project_data )
128
169
129
- # Если данных в столбце 'date' нет, выводим сообщение
130
170
if df .empty or 'date' not in df .columns :
131
- tk .Label (stats_win , text = "Недостаточно данных для отображения статистики." ).pack (pady = 20 )
171
+ tk .Label (stats_win , text = "❌ Недостаточно данных для статистики." , font = ( "Arial" , 12 ) ).pack (pady = 20 )
132
172
return
133
173
134
- # Конвертируем 'date' в формат datetime для графиков
135
174
df ['date' ] = pd .to_datetime (df ['date' ], errors = 'coerce' )
136
-
137
- # Убираем записи с некорректными датами
138
175
df = df .dropna (subset = ['date' ])
139
176
140
- stats_canvas = tk .Frame (stats_win )
141
- stats_canvas .pack (fill = "both" , expand = True )
177
+ if df .empty :
178
+ tk .Label (stats_win , text = "❌ Все даты повреждены или отсутствуют." , font = ("Arial" , 12 )).pack (pady = 20 )
179
+ return
180
+
181
+ df ['month' ] = df ['date' ].dt .to_period ('M' )
182
+
183
+ # === Верхняя сводка ===
184
+ summary_frame = tk .Frame (stats_win )
185
+ summary_frame .pack (pady = 10 )
186
+
187
+ total_projects = len (df )
188
+ total_files = df ['py_count' ].sum ()
189
+ earliest = df ['date' ].min ().strftime ("%Y-%m-%d" )
190
+ latest = df ['date' ].max ().strftime ("%Y-%m-%d" )
191
+ unique_months = df ['month' ].nunique ()
142
192
143
- # === График по дате создания ===
144
- fig , ax = plt .subplots (figsize = (6 , 4 ))
145
- df_by_month = df .groupby (df ['date' ].dt .to_period ('M' )).size ().sort_index ()
146
- df_by_month .plot (kind = 'bar' , ax = ax , title = 'Проекты по месяцам' , rot = 45 )
193
+ summary_text = (
194
+ f"📦 Всего проектов: { total_projects } 🧠 Всего .py файлов: { total_files } \n "
195
+ f"📆 Период: { earliest } → { latest } 📊 Уникальных месяцев: { unique_months } "
196
+ )
197
+ tk .Label (summary_frame , text = summary_text , font = ("Arial" , 11 ), justify = "left" ).pack ()
198
+
199
+ # === График по месяцам ===
200
+ chart_frame = tk .Frame (stats_win )
201
+ chart_frame .pack (fill = "both" , expand = False , pady = 5 )
202
+
203
+ fig , ax = plt .subplots (figsize = (8 , 4 ))
204
+ df_by_month = df .groupby ('month' ).size ().sort_index ()
205
+ df_by_month .plot (kind = 'bar' , ax = ax , color = 'skyblue' , edgecolor = 'black' )
206
+
207
+ ax .set_title ('📅 Количество проектов по месяцам' , fontsize = 14 )
208
+ ax .set_ylabel ('Проекты' , fontsize = 12 )
209
+ ax .set_xlabel ('Месяц' , fontsize = 12 )
210
+ ax .grid (True , axis = 'y' , linestyle = '--' , alpha = 0.5 )
147
211
fig .tight_layout ()
148
212
149
- canvas = FigureCanvasTkAgg (fig , master = stats_canvas )
213
+ canvas = FigureCanvasTkAgg (fig , master = chart_frame )
150
214
canvas .draw ()
151
- canvas .get_tk_widget ().pack (pady = 10 )
215
+ canvas .get_tk_widget ().pack ()
152
216
153
217
# === Анализ технологий ===
218
+ tech_frame = tk .LabelFrame (stats_win , text = "📚 Используемые библиотеки" , font = ("Arial" , 12 ))
219
+ tech_frame .pack (fill = "both" , expand = True , padx = 10 , pady = 10 )
220
+
154
221
if 'stack' in df .columns :
155
222
all_stacks = sum (df ['stack' ].tolist (), [])
156
223
stack_series = pd .Series (all_stacks ).value_counts ()
157
- stack_text = "\n " .join (f"{ lang } : { count } " for lang , count in stack_series .items ())
224
+ if not stack_series .empty :
225
+ stack_text = "\n " .join (f"• { lib :<20} — { count } " for lib , count in stack_series .items ())
226
+ else :
227
+ stack_text = "Нет данных о библиотеках."
158
228
else :
159
229
stack_text = "Нет данных по стеку технологий."
160
230
161
- tk .Label ( stats_canvas , text = "Анализ технологий: \n " + stack_text , justify = "left " , font = ( "Arial" , 12 )) .pack (pady = 5 )
231
+ tk .Message ( tech_frame , text = stack_text , font = ( "Courier New" , 10 ), width = 900 , anchor = "w " , justify = "left" ) .pack ()
162
232
163
233
# === Кнопка закрытия ===
164
- tk .Button (stats_canvas , text = "Закрыть" , command = stats_win .destroy ).pack (pady = 10 )
234
+ tk .Button (stats_win , text = "Закрыть" , command = stats_win .destroy ).pack (pady = 10 )
235
+
0 commit comments