Skip to content

Commit 9cfe5fa

Browse files
committed
scraping project added
1 parent ede8640 commit 9cfe5fa

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed
+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
import pandas as pd
4+
5+
# a function for scraping content from url
6+
def scrape_url(url):
7+
response = requests.get(url)
8+
response = response.content
9+
soup = BeautifulSoup(response, 'html.parser')
10+
return soup
11+
12+
13+
url = 'https://books.toscrape.com/catalogue/category/books/mystery_3/index.html'
14+
print(scrape_url(url))
15+
16+
17+
# extracting data from the content
18+
data1 = []
19+
for i in range(1,51):
20+
url = f'https://books.toscrape.com/catalogue/page-{i}.html'
21+
response = requests.get(url)
22+
response = response.content
23+
soup = BeautifulSoup(response, 'html.parser')
24+
ol = soup.find('ol')
25+
articles = ol.find_all('article', class_='product_pod')
26+
27+
for article in articles:
28+
title_element = article.find('h3')
29+
title = title_element.get_text(strip=True)
30+
price_element = soup.find('p', class_='price_color')
31+
price = price_element.get_text(strip=True)
32+
star_element = article.find('p')
33+
star = star_element['class'][1] if star_element else None
34+
data1.append({"title":title ," Price":price,"Star":star})
35+
# data stored in DataFrame to easy manipulate and preprocess
36+
df = pd.DataFrame(data1)

0 commit comments

Comments
 (0)