basically a final touch

This commit is contained in:
Daniel Knüttel 2019-02-25 13:07:52 +01:00
parent e7aeaf94fa
commit fdd5fbd888
5 changed files with 51 additions and 32 deletions

View File

@ -16,6 +16,13 @@ class counter_of_calls(object):
def f(x): def f(x):
return x + 6 return x + 6
@counter_of_calls
def g(x):
return x + 7
if( __name__ == "__main__"): if( __name__ == "__main__"):
print(f(3)) print(f(3))
print(f(5)) print(f(5))
print(g(3))
print(g(5))

View File

@ -1,3 +1,4 @@
import numpy as np
def bisec(f, a, b, eps, nmax): def bisec(f, a, b, eps, nmax):
""" """
@ -52,7 +53,10 @@ if( __name__ == "__main__"):
f2 = lambda x: x**3 f2 = lambda x: x**3
f3 = lambda x: -x + 1 f3 = lambda x: -x + 1
f4 = lambda x: -x**3 + 1 f4 = lambda x: -x**3 + 1
f4 = lambda x: (x - 2)*np.exp(-x**2)
fs = [f1, f2, f3, f4] fs = [f1, f2, f3, f4]
for f in fs: for f in fs:
print(bisec(f, -12, 10, 0.001, 100)) print(bisec(f, -12, 10, 0.0000001, 100))
print(bisec(f4, 1.2, 2.4, 0.001, 100))

View File

@ -4,10 +4,6 @@ import matplotlib.pyplot as plt
f = lambda x: x*np.sin(7*x)*np.exp(-(x - 2)**2) f = lambda x: x*np.sin(7*x)*np.exp(-(x - 2)**2)
x0 = f(np.arange(-20, 20, 0.1)).min()
m = minimize(f, x0, method="CG")
print(m)
g = lambda x0: minimize(f, x0, method="CG").x g = lambda x0: minimize(f, x0, method="CG").x

View File

@ -24,7 +24,7 @@ def get_data_with_proxy(url, conn_object, visit_first=None):
session = requests.Session() session = requests.Session()
session.proxies = { 'http': i} session.proxies = { 'http': i}
try: try:
response = session.get(url, headers=headers, timeout=3) response = session.get(url, headers=headers, timeout=1)
except Exception as e: except Exception as e:
if(isinstance(e, KeyboardInterrupt)): if(isinstance(e, KeyboardInterrupt)):
raise e raise e

View File

@ -1,7 +1,10 @@
import logging import logging
from collections import deque
from cfg import config
from url import construct_url from url import construct_url
from proxy import get_data_with_proxy, NoMoreProxiesException from proxy import get_data_with_proxy, NoMoreProxiesException
from db_util import get_page_id from db_util import get_page_id, get_page_title
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -17,12 +20,14 @@ def ignore_title(title):
return True return True
return False return False
def _receive_links(title, connection): def _receive_links(page, connection):
title = get_page_title(page, connection)
url = construct_url(title) url = construct_url(title)
source = get_page_id(title, connection)
result = get_data_with_proxy(url, connection) result = get_data_with_proxy(url, connection)
# This is basically because we don't know the page ID. # This is basically because we don't know the page ID.
for k, page_data in result["query"]["pages"].items(): for k, page_data in result["query"]["pages"].items():
cursor = connection.cursor() cursor = connection.cursor()
@ -31,45 +36,53 @@ def _receive_links(title, connection):
# avoid 1-loops # avoid 1-loops
if(destination_title == title): if(destination_title == title):
continue continue
if(ignore_title(title)): if(ignore_title(destination_title)):
continue continue
destination = get_page_id(destination_title, connection) destination = get_page_id(destination_title, connection)
cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (source, destination)) cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (page, destination))
yield destination_title yield destination
else: else:
for destination in page_data["links"]: for destination in page_data["links"]:
if(ignore_title(title)):
continue
destination_title = destination["title"].replace(" ", "_") destination_title = destination["title"].replace(" ", "_")
if(ignore_title(destination_title)):
continue
destination = get_page_id(destination_title, connection) destination = get_page_id(destination_title, connection)
cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (source, destination)) cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (page, destination))
yield destination_title yield destination
connection.commit() connection.commit()
def receive_links(title, connection): def receive_links(title, connection):
return list(_receive_links(title, connection)) return list(_receive_links(title, connection))
def receive_link_graph(title, connection, depth, fetch_missing=True): def receive_link_graph(title, connection, depth):
page = get_page_id(title, connection)
do_receive_link_graph(page, connection, depth, fetch_missing=True)
cursor = connection.cursor()
cursor.execute("SELECT COUNT(page) FROM failed_to_fetch")
if(cursor.fetchone()[0]):
do_receive_link_graph(page, connection, depth, fetch_missing=True)
def do_receive_link_graph(page, connection, depth, fetch_missing=False):
if(depth < 0):
# end of recursion
return
logger.info("do_receive_link_graph(%d, <connection>, %d)" % (page, depth))
cursor = connection.cursor() cursor = connection.cursor()
# Fetch the missing links. # Fetch the missing links.
if(fetch_missing): if(fetch_missing):
delete_cursor = connection.cursor() delete_cursor = connection.cursor()
cursor.execute('''SELECT pages.title, failed_to_fetch.depth, failed_to_fetch.page cursor.execute('''SELECT failed_to_fetch.depth, failed_to_fetch.page
FROM failed_to_fetch FROM failed_to_fetch
LEFT JOIN pages ON pages.page_id=failed_to_fetch.page''') ''')
for t, d, p in cursor: for d, p in cursor:
receive_link_graph(t, connection, d, fetch_missing=False) do_receive_link_graph(p, connection, d, fetch_missing=False)
delete_cursor.execute("DELETE FROM failed_to_fetch WHERE page=%s", (p,)) delete_cursor.execute("DELETE FROM failed_to_fetch WHERE page=%s", (p,))
if(depth < 0):
# end of recursion
return
page = get_page_id(title, connection)
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute("SELECT COUNT(source) FROM links WHERE source=%s", (page,)) cursor.execute("SELECT COUNT(source) FROM links WHERE source=%s", (page,))
@ -77,19 +90,18 @@ def receive_link_graph(title, connection, depth, fetch_missing=True):
# we fetched that title already # we fetched that title already
return return
logger.info("fetching links for {}".format(title)) logger.info("fetching links for {}".format(page))
for link in _receive_links(title, connection): for link in _receive_links(page, connection):
try: try:
receive_link_graph(link, connection, depth - 1) do_receive_link_graph(link, connection, depth - 1)
except NoMoreProxiesException as e: except NoMoreProxiesException as e:
logger.exception("All proxies are blocked") logger.exception("All proxies are blocked")
# Wikipedia blocked all our proxies. # Wikipedia blocked all our proxies.
# Retry later, so we have to store our list that is still to fetch. # Retry later, so we have to store our list that is still to fetch.
cursor = connection.cursor() cursor = connection.cursor()
failed_page = get_page_id(link, connection) cursor.execute("INSERT INTO failed_to_fetch(page, depth) VALUES(%s, %s)", (link, depth - 1))
cursor.execute("INSERT INTO failed_to_fetch(page, depth) VALUES(%s, %s)", (failed_page, depth - 1))
connection.commit() connection.commit()