Monday 29 May 2017

Python script to list your most visited websites

Hi friends,

In this post, I'll share with you a python script to determine the top 10 visited websites using your Chrome browser. The python script given below does the following four important tasks which are implemented using four different functions:

  • Finding the location of the Chrome history database
  • Executing the query on the database to get the URLs visited and associated information
  • Count the URLs visited from the information obtained from the results of the above query
  • Plot the results of top ten most frequently visited URLs

import sqlite3
import os
import operator
from collections import OrderedDict
import pylab as plt

# Function to extract the domain name from a URL
def parseUrl(url):
 try:
  urlComponents = url.split('//')
  afterHttps = urlComponents[1].split('/', 1)
  domainName = afterHttps[0].replace("www.", "")
  return domainName
 except IndexError:
  print("Error in URL")

# Function to return the history database location
def getHistoryFile():
 #data_path = "C:\\Users\\Dell\\AppData\\Local\\Google\\Chrome\\User Data\\Default" 
 filePath = os.path.expanduser('~') + "\AppData\Local\Google\Chrome\\User Data\Default" # user's history database path (Chrome)
 getFiles = os.listdir(filePath)
 historyFile = os.path.join(filePath, 'history')
 return historyFile

# Function to query on the database file
def queryHistoryFile(historyFile):
 c = sqlite3.connect(historyFile)
 cursor = c.cursor()
 query = "SELECT urls.url, urls.visit_count FROM urls, visits WHERE urls.id = visits.url;"
 cursor.execute(query)
 results = cursor.fetchall()
 return results

# Function to count and plot the results barplot
def plotResults(results):
 sitesCount = {}
 # count the occurrences of each url visits
 for url, count in results:
  url = parseUrl(url)
  if url in sitesCount:
   sitesCount[url] += 1
  else:
   sitesCount[url] = 1

 #Sort in descending order
 sortedCount = OrderedDict(sorted(sitesCount.items(), key = operator.itemgetter(1), reverse = True))
 #Extracting the top 10
 index = list(range(1, 11))
 count = list(sortedCount.values())[:10]
 xLables = list(sortedCount.keys())[:10]

 #Plot the results
 plt.bar(index, count, align='center')
 plt.xticks(index, xLables)
 plt.show()
 historyFile = getHistoryFile()

#code execution starts here
historyFile = getHistoryFile()
queryResults = queryHistoryFile(historyFile)
plotResults(queryResults)

Note: The script requires the Chrome Browser to be closed during execution.
Share:

2 comments:

  1. Hello admin can we direct run this in python compiler or we need to do some extra things?

    ReplyDelete
    Replies
    1. You just need Python to run the above script.

      Delete