added mysql support (in theory)
This commit is contained in:
parent
635a9f7739
commit
c958e44632
|
@ -1,26 +1,48 @@
|
|||
import os
|
||||
import sqlite3
|
||||
|
||||
from cfg import config
|
||||
if(not config["use_sqlite"]):
|
||||
import pymysql
|
||||
|
||||
from proxy import fetch_proxies
|
||||
|
||||
def get_cache(directory, name):
|
||||
cache_file = os.path.join(directory, "{}.sqlite".format(name))
|
||||
if(not os.path.exists(cache_file)):
|
||||
with open(cache_file, "w") as fin:
|
||||
pass
|
||||
if(config["use_sqlite"]):
|
||||
cache_file = os.path.join(directory, "{}.sqlite".format(name))
|
||||
if(not os.path.exists(cache_file)):
|
||||
with open(cache_file, "w") as fin:
|
||||
pass
|
||||
db = sqlite3.connect(cache_file)
|
||||
|
||||
cursor = db.cursor()
|
||||
|
||||
cursor.execute("CREATE TABLE proxies(proxy TEXT, lasttime_could_not_be_used DECIMAL)")
|
||||
cursor.execute("CREATE TABLE links(source TEXT, destination TEXT)")
|
||||
cursor.execute("CREATE TABLE dijkstra_helper(name TEXT UNIQUE, value INT)")
|
||||
cursor.execute("CREATE TABLE failed_to_fetch(title TEXT, depth INT)")
|
||||
|
||||
db.commit()
|
||||
db = sqlite3.connect(cache_file)
|
||||
fetch_proxies(db)
|
||||
return db
|
||||
db = pymysql.connect(
|
||||
host=config["mysql_server"]
|
||||
, user=config["mysql_user"]
|
||||
, password=config["mysql_password"]
|
||||
, db=config["mysql_database"]
|
||||
, charset="utf8")
|
||||
|
||||
cursor = db.cursor()
|
||||
cursor = db.cursor()
|
||||
|
||||
cursor.execute("CREATE TABLE proxies(proxy TEXT, lasttime_could_not_be_used DECIMAL)")
|
||||
cursor.execute("CREATE TABLE links(source TEXT, destination TEXT)")
|
||||
cursor.execute("CREATE TABLE dijkstra_helper(name TEXT UNIQUE, value INT)")
|
||||
cursor.execute("CREATE TABLE failed_to_fetch(title TEXT, depth INT)")
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS proxies(proxy varchar(100), lasttime_could_not_be_used DECIMAL)")
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS links(source varchar(50), destination varchar(50))")
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS dijkstra_helper(name varchar(50) UNIQUE, value INT)")
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS failed_to_fetch(title varchar(50), depth INT)")
|
||||
|
||||
db.commit()
|
||||
db = sqlite3.connect(cache_file)
|
||||
fetch_proxies(db)
|
||||
db.commit()
|
||||
return db
|
||||
|
||||
|
||||
def clear_cache_data(connection):
|
||||
cursor = connection.cursor()
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
from collections import deque
|
||||
|
||||
from cfg import config
|
||||
|
||||
def can_reach(title, connection):
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("SELECT COUNT(destination) FROM links WHERE destination=?", (title, ))
|
||||
|
@ -15,12 +17,20 @@ def shortest_path(center, title, connection):
|
|||
path = deque()
|
||||
while(current_title != center):
|
||||
path.append(current_title)
|
||||
cursor.execute('''SELECT links.source
|
||||
FROM links
|
||||
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name
|
||||
WHERE links.destination=:title
|
||||
ORDER BY dijkstra_helper.value ASC
|
||||
LIMIT 1''', {"title": current_title})
|
||||
if(config["use_sqlite"]):
|
||||
cursor.execute('''SELECT links.source
|
||||
FROM links
|
||||
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name
|
||||
WHERE links.destination=:title
|
||||
ORDER BY dijkstra_helper.value ASC
|
||||
LIMIT 1''', {"title": current_title})
|
||||
else:
|
||||
cursor.execute('''SELECT links.source
|
||||
FROM links
|
||||
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name
|
||||
WHERE links.destination=:title
|
||||
SORT BY dijkstra_helper.value ASC
|
||||
LIMIT 1''', {"title": current_title})
|
||||
current_title = cursor.fetchone()[0]
|
||||
return list(reversed(path))
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
from collections import deque
|
||||
|
||||
from cfg import config
|
||||
|
||||
def prepare_dijkstra(connection):
|
||||
cursor = connection.cursor()
|
||||
|
@ -5,7 +8,10 @@ def prepare_dijkstra(connection):
|
|||
SELECT destination FROM links
|
||||
''')
|
||||
|
||||
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
|
||||
if(config["use_sqlite"]):
|
||||
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
|
||||
else:
|
||||
cursor.execute("UPDATE dijkstra_helper SET value=2147483647")
|
||||
connection.commit()
|
||||
|
||||
def dijkstra_one(title, value, connection):
|
||||
|
@ -35,9 +41,11 @@ def recursive_dijkstra(titles, value, connection):
|
|||
if(not titles):
|
||||
return
|
||||
|
||||
todos = deque()
|
||||
for title in titles:
|
||||
todos = dijkstra_one(title, value, connection)
|
||||
recursive_dijkstra(todos, value + 1, connection)
|
||||
todos.extend(dijkstra_one(title, value, connection))
|
||||
|
||||
recursive_dijkstra(todos, value + 1, connection)
|
||||
|
||||
|
||||
def dijkstra(title, connection):
|
||||
|
|
|
@ -6,6 +6,7 @@ from cache import get_cache
|
|||
from receive import receive_links, receive_link_graph
|
||||
|
||||
from dijkstra import prepare_dijkstra, dijkstra
|
||||
from connectivity import shortest_path
|
||||
|
||||
cache = get_cache("./cache/", "Angela_Merkel")
|
||||
receive_link_graph("Angela_Merkel", cache, 2)
|
||||
|
@ -14,6 +15,7 @@ cursor = cache.cursor()
|
|||
cursor.execute("SELECT COUNT(source) FROM links")
|
||||
print(cursor.fetchall())
|
||||
|
||||
prepare_dijkstra(cache)
|
||||
dijkstra("Angela_Merkel", cache)
|
||||
#prepare_dijkstra(cache)
|
||||
#dijkstra("Angela_Merkel", cache)
|
||||
|
||||
print(shortest_path("Angela_Merkel", "Germany", cache))
|
||||
|
|
|
@ -8,6 +8,8 @@ import logging
|
|||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from cfg import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class NoMoreProxiesException(Exception):
|
||||
|
@ -16,7 +18,10 @@ class NoMoreProxiesException(Exception):
|
|||
def get_data_with_proxy(url, conn_object, visit_first=None):
|
||||
cursor = conn_object.cursor()
|
||||
# Assume that table name is proxies
|
||||
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''')
|
||||
if(config["use_sqlite"]):
|
||||
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''')
|
||||
else:
|
||||
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies SORT BY lasttime_could_not_be_used ASC''')
|
||||
headers = {}
|
||||
for i, lasttime_could_not_be_used in cursor:
|
||||
session = requests.Session()
|
||||
|
|
Loading…
Reference in New Issue
Block a user