File tree 1 file changed +36
-0
lines changed
Web scraping for book names
1 file changed +36
-0
lines changed Original file line number Diff line number Diff line change
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+
5
+ # a function for scraping content from url
6
+ def scrape_url (url ):
7
+ response = requests .get (url )
8
+ response = response .content
9
+ soup = BeautifulSoup (response , 'html.parser' )
10
+ return soup
11
+
12
+
13
+ url = 'https://books.toscrape.com/catalogue/category/books/mystery_3/index.html'
14
+ print (scrape_url (url ))
15
+
16
+
17
+ # extracting data from the content
18
+ data1 = []
19
+ for i in range (1 ,51 ):
20
+ url = f'https://books.toscrape.com/catalogue/page-{ i } .html'
21
+ response = requests .get (url )
22
+ response = response .content
23
+ soup = BeautifulSoup (response , 'html.parser' )
24
+ ol = soup .find ('ol' )
25
+ articles = ol .find_all ('article' , class_ = 'product_pod' )
26
+
27
+ for article in articles :
28
+ title_element = article .find ('h3' )
29
+ title = title_element .get_text (strip = True )
30
+ price_element = soup .find ('p' , class_ = 'price_color' )
31
+ price = price_element .get_text (strip = True )
32
+ star_element = article .find ('p' )
33
+ star = star_element ['class' ][1 ] if star_element else None
34
+ data1 .append ({"title" :title ," Price" :price ,"Star" :star })
35
+ # data stored in DataFrame to easy manipulate and preprocess
36
+ df = pd .DataFrame (data1 )
You can’t perform that action at this time.
0 commit comments