Hi friends,
In this post, I'll share with you a python script to determine the top 10 visited websites using your Chrome browser. The python script given below does the following four important tasks which are implemented using four different functions:
- Finding the location of the Chrome history database
- Executing the query on the database to get the URLs visited and associated information
- Count the URLs visited from the information obtained from the results of the above query
- Plot the results of top ten most frequently visited URLs
import sqlite3
import os
import operator
from collections import OrderedDict
import pylab as plt
# Function to extract the domain name from a URL
def parseUrl(url):
try:
urlComponents = url.split('//')
afterHttps = urlComponents[1].split('/', 1)
domainName = afterHttps[0].replace("www.", "")
return domainName
except IndexError:
print("Error in URL")
# Function to return the history database location
def getHistoryFile():
#data_path = "C:\\Users\\Dell\\AppData\\Local\\Google\\Chrome\\User Data\\Default"
filePath = os.path.expanduser('~') + "\AppData\Local\Google\Chrome\\User Data\Default" # user's history database path (Chrome)
getFiles = os.listdir(filePath)
historyFile = os.path.join(filePath, 'history')
return historyFile
# Function to query on the database file
def queryHistoryFile(historyFile):
c = sqlite3.connect(historyFile)
cursor = c.cursor()
query = "SELECT urls.url, urls.visit_count FROM urls, visits WHERE urls.id = visits.url;"
cursor.execute(query)
results = cursor.fetchall()
return results
# Function to count and plot the results barplot
def plotResults(results):
sitesCount = {}
# count the occurrences of each url visits
for url, count in results:
url = parseUrl(url)
if url in sitesCount:
sitesCount[url] += 1
else:
sitesCount[url] = 1
#Sort in descending order
sortedCount = OrderedDict(sorted(sitesCount.items(), key = operator.itemgetter(1), reverse = True))
#Extracting the top 10
index = list(range(1, 11))
count = list(sortedCount.values())[:10]
xLables = list(sortedCount.keys())[:10]
#Plot the results
plt.bar(index, count, align='center')
plt.xticks(index, xLables)
plt.show()
historyFile = getHistoryFile()
#code execution starts here
historyFile = getHistoryFile()
queryResults = queryHistoryFile(historyFile)
plotResults(queryResults)
Note: The script requires the Chrome Browser to be closed during execution.