6
6
from .audio_url_decoder import decode_audio_url
7
7
from .exceptions import AccessDenied
8
8
9
- RE_AUDIO = re .compile (r'audio[-\d]+_\d+_audios\d+' )
9
+ RE_AUDIO_ID = re .compile (r'audio(-?\d+)_(\d+)' )
10
+ RE_ALBUM_ID = re .compile (r'act=audio_playlist(-?\d+)_(\d+)' )
10
11
12
+ TRACKS_PER_USER_PAGE = 50
13
+ TRACKS_PER_ALBUM_PAGE = 100
14
+ ALBUMS_PER_USER_PAGE = 100
11
15
12
- class VkAudio :
16
+
17
+ class VkAudio (object ):
13
18
14
19
__slots__ = ('_vk' , 'user_id' )
15
20
16
21
def __init__ (self , vk ):
17
- self .user_id = vk .get_api ().users .get ()[0 ]['id' ]
22
+ """
23
+
24
+ :type vk: vk_api.VkApi
25
+ """
26
+ self .user_id = vk .method ('users.get' )[0 ]['id' ]
18
27
self ._vk = vk
19
28
20
- def get (self , owner_id = None , album_id = None , offset = 0 ):
21
- """ Получить список аудиозаписей пользователя
29
+ def get_iter (self , owner_id = None , album_id = None ):
30
+ """ Получить список аудиозаписей пользователя (по частям)
22
31
23
32
:param owner_id: ID владельца (отрицательные значения для групп)
24
- :param album_id: ID альбома (отрицательные значения для групп)
25
- :param offset: смещение
33
+ :param album_id: ID альбома
26
34
"""
27
35
28
- if owner_id is None and album_id is None :
29
- raise TypeError (
30
- 'get() missing 1 required argument: album_id or owner_id'
31
- )
32
- elif owner_id is not None and album_id is not None :
33
- raise TypeError ('get() too many arguments' )
36
+ if owner_id is None :
37
+ owner_id = self .user_id
34
38
35
39
if album_id is not None :
36
- url = 'https://m.vk.com/audio?act=audio_playlist{}' .format (album_id )
40
+ url = 'https://m.vk.com/audio?act=audio_playlist{}_{}' .format (
41
+ owner_id , album_id
42
+ )
43
+ offset_diff = TRACKS_PER_ALBUM_PAGE
37
44
else :
38
45
url = 'https://m.vk.com/audios{}' .format (owner_id )
46
+ offset_diff = TRACKS_PER_USER_PAGE
47
+
48
+ offset = 0
49
+ while True :
50
+ response = self ._vk .http .get (
51
+ url ,
52
+ params = {
53
+ 'offset' : offset
54
+ },
55
+ allow_redirects = False
56
+ )
39
57
40
- response = self ._vk .http .get (
41
- url ,
42
- params = {
43
- 'offset' : offset
44
- },
45
- allow_redirects = False
46
- )
58
+ if not response .text :
59
+ raise AccessDenied (
60
+ 'You don\' t have permissions to browse user\' s audio'
61
+ )
47
62
48
- if not response .text :
49
- raise AccessDenied (
50
- 'You don\' t have permissions to browse user\' s audio'
51
- )
63
+ tracks = scrap_data (response .text , self .user_id )
52
64
53
- return scrap_data (response .text , self .user_id )
65
+ if not tracks :
66
+ break
54
67
55
- def get_albums (self , owner_id , offset = 0 ):
56
- """ Получить список альбомов пользователя
68
+ for i in tracks :
69
+ yield i
70
+
71
+ offset += offset_diff
72
+
73
+ def get (self , owner_id = None , album_id = None ):
74
+ """ Получить список аудиозаписей пользователя
57
75
58
76
:param owner_id: ID владельца (отрицательные значения для групп)
59
- :param offset: смещение
77
+ :param album_id: ID альбома
60
78
"""
61
79
62
- response = self ._vk .http .get (
63
- 'https://m.vk.com/audio?act=audio_playlists{}' .format (owner_id ),
64
- params = {
65
- 'offset' : offset
66
- },
67
- allow_redirects = False
68
- )
80
+ return list (self .get_iter (owner_id , album_id ))
69
81
70
- if not response .text :
71
- raise AccessDenied (
72
- 'You don\' t have permissions to browse {}\' s albums' .format (
82
+ def get_albums_iter (self , owner_id = None ):
83
+ """ Получить список альбомов пользователя (по частям)
84
+
85
+ :param owner_id: ID владельца (отрицательные значения для групп)
86
+ """
87
+
88
+ if owner_id is None :
89
+ owner_id = self .user_id
90
+
91
+ offset = 0
92
+
93
+ while True :
94
+ response = self ._vk .http .get (
95
+ 'https://m.vk.com/audio?act=audio_playlists{}' .format (
73
96
owner_id
74
- )
97
+ ),
98
+ params = {
99
+ 'offset' : offset
100
+ },
101
+ allow_redirects = False
75
102
)
76
103
77
- return scrap_albums (response .text )
104
+ if not response .text :
105
+ raise AccessDenied (
106
+ 'You don\' t have permissions to browse {}\' s albums' .format (
107
+ owner_id
108
+ )
109
+ )
110
+
111
+ albums = scrap_albums (response .text )
112
+
113
+ if not albums :
114
+ break
115
+
116
+ for i in albums :
117
+ yield i
78
118
79
- def search_user (self , owner_id , q = '' ):
119
+ offset += ALBUMS_PER_USER_PAGE
120
+
121
+ def get_albums (self , owner_id = None ):
122
+ """ Получить список альбомов пользователя
123
+
124
+ :param owner_id: ID владельца (отрицательные значения для групп)
125
+ """
126
+
127
+ return list (self .get_albums_iter (owner_id ))
128
+
129
+ def search_user (self , owner_id = None , q = '' ):
80
130
""" Искать по аудиозаписям пользователя
81
131
82
132
:param owner_id: ID владельца (отрицательные значения для групп)
83
133
:param q: запрос
84
134
"""
85
135
136
+ if owner_id is None :
137
+ owner_id = self .user_id
138
+
86
139
response = self ._vk .http .get (
87
140
'https://m.vk.com/audio' ,
88
141
params = {
@@ -101,7 +154,7 @@ def search_user(self, owner_id, q=''):
101
154
102
155
return [
103
156
i for i in scrap_data (response .text , self .user_id )
104
- if RE_AUDIO . search ( i ['id' ])
157
+ if i ['owner_id' ] == owner_id
105
158
]
106
159
107
160
def search (self , q = '' , offset = 0 ):
@@ -128,26 +181,30 @@ def scrap_data(html, user_id):
128
181
129
182
soup = BeautifulSoup (html , 'html.parser' )
130
183
tracks = []
184
+
131
185
for audio in soup .find_all ('div' , {'class' : 'audio_item' }):
132
- if 'audio_item_disabled' in audio ["class" ]:
133
- # TODO: implement getting data of unavailable track
186
+ if 'audio_item_disabled' in audio ['class' ]:
134
187
continue
135
188
136
- artist = audio .select ('.ai_artist' )[0 ].text
137
- title = audio .select ('.ai_title' )[0 ].text
138
- duration = audio .select ('.ai_dur' )[0 ]['data-dur' ]
139
- track_id = audio ['id' ]
140
- link = audio .select ('.ai_body' )[0 ].input ['value' ]
189
+ artist = audio .select_one ('.ai_artist' ).text
190
+ title = audio .select_one ('.ai_title' ).text
191
+ duration = int (audio .select_one ('.ai_dur' )['data-dur' ])
192
+ full_id = tuple (
193
+ int (i ) for i in RE_AUDIO_ID .search (audio ['id' ]).groups ()
194
+ )
195
+ link = audio .select_one ('.ai_body' ).input ['value' ]
141
196
142
197
if 'audio_api_unavailable' in link :
143
198
link = decode_audio_url (link , user_id )
144
199
145
200
tracks .append ({
201
+ 'id' : full_id [1 ],
202
+ 'owner_id' : full_id [0 ],
203
+ 'url' : link ,
204
+
146
205
'artist' : artist ,
147
206
'title' : title ,
148
- 'dur' : duration ,
149
- 'id' : track_id ,
150
- 'url' : link
207
+ 'duration' : duration ,
151
208
})
152
209
153
210
return tracks
@@ -158,15 +215,24 @@ def scrap_albums(html):
158
215
159
216
soup = BeautifulSoup (html , 'html.parser' )
160
217
albums = []
218
+
161
219
for album in soup .find_all ('div' , {'class' : 'audioPlaylistsPage__item' }):
162
- link = album .select ('.audioPlaylistsPage__itemLink' )[0 ]['href' ]
220
+
221
+ link = album .select_one ('.audioPlaylistsPage__itemLink' )['href' ]
222
+ full_id = tuple (int (i ) for i in RE_ALBUM_ID .search (link ).groups ())
223
+
224
+ stats_text = album .select_one ('.audioPlaylistsPage__stats' ).text
225
+ plays = int (stats_text .split (maxsplit = 1 )[0 ])
163
226
164
227
albums .append ({
165
- 'artist' : album .select ('.audioPlaylistsPage__author' )[0 ].text ,
166
- 'title' : album .select ('.audioPlaylistsPage__title' )[0 ].text ,
167
- 'plays' : album .select ('.audioPlaylistsPage__stats' )[0 ].text ,
168
- 'id' : album ['class' ][1 ][25 :],
169
- 'url' : 'https://m.vk.com/audio?act=audio_playlist{}' .format (link )
228
+ 'id' : full_id [1 ],
229
+ 'owner_id' : full_id [0 ],
230
+ 'url' : 'https://m.vk.com/audio?act=audio_playlist{}_{}' .format (
231
+ * full_id
232
+ ),
233
+
234
+ 'title' : album .select_one ('.audioPlaylistsPage__title' ).text ,
235
+ 'plays' : plays
170
236
})
171
237
172
238
return albums
0 commit comments