-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdemo_spider1.py
More file actions
36 lines (25 loc) · 878 Bytes
/
demo_spider1.py
File metadata and controls
36 lines (25 loc) · 878 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#coding:utf-8
# 下载列表 +歌曲名
# BeautifulSoap
# http://www.crummy.com/software/BeautifulSoup/bs3/documentation.zh.html
import re,urllib,urllib2
from BeautifulSoup import BeautifulSOAP
url= 'http://www.xiami.com/artist/top/id/1234'
headers = {'User-Agent':"Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0"}
req = urllib2.Request(url=url,headers=headers)
content = urllib2.urlopen(req)
soup = BeautifulSOAP(content,fromEncoding="gb18030")
#print soup.originalEncoding
#print soup.prettify()
songlist = soup.findAll('a',{'href':re.compile(r'/song/(\d)+')})
#print dir(songlist[0])
for song in songlist:
song_url=''
song_url= 'www.xiami.com' + song.get('href')
print song_url ,song.string
#songlist = re.findall(pattern,string)
#songlist = re.findall(pattern,content)
#for song in songlist:
# print song
#
print "end"