Data Science – 2 [web scraping]

Data science with using Python

In the following example we will be retrieving live population records using BeautifulSoup package.[web scraping]

  1. Live Population record
#data science with Python - world's population stat
import bs4 
import pandas as pd 
import requests 

url = 'https://www.worldometers.info/world-population/#:~:text=Population%20in%20the%20world%20is,it%20was%20at%20around%202%25.'

result = requests.get(url) 

soup = bs4.BeautifulSoup(result.text,'lxml') 
pop = soup.find_all('table' ,class_= 'table table-striped table-bordered table-hover table-condensed table-list')

data = [] 
for i in pop:
        strong = i.find('strong')
        data.append(strong.string)
#to remove duplicate
mylist = list(dict.fromkeys(data))
print("World's Population:",mylist, "in Cr")
#print(pop)

Output of the above code is :

World’s Population: [‘7,794,798,739’] in Cr

2. To retrieve live currency [1 US$equal to INR] and BSE SENSEX live price using web scraping.

# US$ vs INR - Live Scrapping & Live share market 
import bs4 
import pandas as pd 
import requests 
url = 'https://in.finance.yahoo.com/quote/INR=X?p=INR=X'
result = requests.get(url) 
soup = bs4.BeautifulSoup(result.text,'lxml') 

val= soup.find_all('div' ,class_= 'My(6px) Pos(r) smartphone_Mt(6px)')
# to store data
data = [] 
for i in val: 
  span = i.find('span') 
  data.append(span.string)
res = str(data)[1:-1] 
print("1 US$ equal to  :", res, 'INR') 

# share market live
url2 = 'https://in.finance.yahoo.com/quote/%5EBSESN?p=^BSESN'
result2 = requests.get(url2) 
soup = bs4.BeautifulSoup(result2.text,'lxml') 
#search for maincounter-number class
val2= soup.find_all('div' ,class_= 'My(6px) Pos(r) smartphone_Mt(6px)')
# to store data
data2 = [] 
for i in val2: 
  span = i.find('span') 
  data2.append(span.string)
res2 = str(data2)[1:-1] 
print("BSE Market Price   :", res2, 'INR') 

#store data to excel 
f=open('sharemarket.csv', 'w')
f.write(res2)
f.close()
print('Stored succesfully')

from tkinter import *
root = Tk()
root.title('Currency and live market price')
root.configure(bg='gold')

t1='1 US$ equal to :'+str(res)
w1 = Label(root, text=t1, bg='blue',fg='white',  font='sans 14')

t2='BSE Market Price :'+str(res2)
w2 = Label(root, text=t2, bg='magenta', fg='white', font='sans 14')

w3 = Label(root, text='Currency and live market price', bg='green', fg='white', font='sans 16')
w3.pack(fill='both')
w1.pack(fill='both')
w2.pack(fill='both')

root.mainloop() 

Output of the above program is:

3. Check multiple websites title using web scraping

import requests
from bs4 import BeautifulSoup

urls = ["https://www.amazon.in", "http://www.msn.com"]

for url in urls:
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    print("Title: %s" % soup.title.text)
    

Output of the above program is :

Title: Online Shopping site in India: Shop Online for Mobiles, Books, Watches, Shoes and More – Amazon.in
Title: MSN India | Breaking News, Entertainment, Latest Videos, Outlook

 

Share
Share
Scroll to Top