diff --git a/ex_35.py b/ex_35.py index 5e7990c..ca81966 100644 --- a/ex_35.py +++ b/ex_35.py @@ -16,6 +16,13 @@ class counter_of_calls(object): def f(x): return x + 6 +@counter_of_calls +def g(x): + return x + 7 + if( __name__ == "__main__"): print(f(3)) print(f(5)) + + print(g(3)) + print(g(5)) diff --git a/ex_46.py b/ex_46.py index 552fb03..dddc609 100644 --- a/ex_46.py +++ b/ex_46.py @@ -1,3 +1,4 @@ +import numpy as np def bisec(f, a, b, eps, nmax): """ @@ -52,7 +53,10 @@ if( __name__ == "__main__"): f2 = lambda x: x**3 f3 = lambda x: -x + 1 f4 = lambda x: -x**3 + 1 + f4 = lambda x: (x - 2)*np.exp(-x**2) fs = [f1, f2, f3, f4] for f in fs: - print(bisec(f, -12, 10, 0.001, 100)) + print(bisec(f, -12, 10, 0.0000001, 100)) + +print(bisec(f4, 1.2, 2.4, 0.001, 100)) diff --git a/ex_47.py b/ex_47.py index f752279..e57220a 100644 --- a/ex_47.py +++ b/ex_47.py @@ -4,10 +4,6 @@ import matplotlib.pyplot as plt f = lambda x: x*np.sin(7*x)*np.exp(-(x - 2)**2) -x0 = f(np.arange(-20, 20, 0.1)).min() - -m = minimize(f, x0, method="CG") -print(m) g = lambda x0: minimize(f, x0, method="CG").x diff --git a/exam/ex01/proxy.py b/exam/ex01/proxy.py index 049a791..a305b86 100644 --- a/exam/ex01/proxy.py +++ b/exam/ex01/proxy.py @@ -24,7 +24,7 @@ def get_data_with_proxy(url, conn_object, visit_first=None): session = requests.Session() session.proxies = { 'http': i} try: - response = session.get(url, headers=headers, timeout=3) + response = session.get(url, headers=headers, timeout=1) except Exception as e: if(isinstance(e, KeyboardInterrupt)): raise e diff --git a/exam/ex01/receive.py b/exam/ex01/receive.py index 81958ec..1cecea6 100644 --- a/exam/ex01/receive.py +++ b/exam/ex01/receive.py @@ -1,7 +1,10 @@ import logging +from collections import deque + +from cfg import config from url import construct_url from proxy import get_data_with_proxy, NoMoreProxiesException -from db_util import get_page_id +from db_util import get_page_id, get_page_title logger = logging.getLogger(__name__) @@ -17,12 +20,14 @@ def ignore_title(title): return True return False -def _receive_links(title, connection): +def _receive_links(page, connection): + title = get_page_title(page, connection) url = construct_url(title) - source = get_page_id(title, connection) - result = get_data_with_proxy(url, connection) + + + # This is basically because we don't know the page ID. for k, page_data in result["query"]["pages"].items(): cursor = connection.cursor() @@ -31,45 +36,53 @@ def _receive_links(title, connection): # avoid 1-loops if(destination_title == title): continue - if(ignore_title(title)): + if(ignore_title(destination_title)): continue destination = get_page_id(destination_title, connection) - cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (source, destination)) - yield destination_title + cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (page, destination)) + yield destination else: for destination in page_data["links"]: - if(ignore_title(title)): - continue destination_title = destination["title"].replace(" ", "_") + if(ignore_title(destination_title)): + continue destination = get_page_id(destination_title, connection) - cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (source, destination)) - yield destination_title + cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (page, destination)) + yield destination connection.commit() def receive_links(title, connection): return list(_receive_links(title, connection)) -def receive_link_graph(title, connection, depth, fetch_missing=True): +def receive_link_graph(title, connection, depth): + page = get_page_id(title, connection) + do_receive_link_graph(page, connection, depth, fetch_missing=True) + + cursor = connection.cursor() + cursor.execute("SELECT COUNT(page) FROM failed_to_fetch") + if(cursor.fetchone()[0]): + do_receive_link_graph(page, connection, depth, fetch_missing=True) + +def do_receive_link_graph(page, connection, depth, fetch_missing=False): + if(depth < 0): + # end of recursion + return + logger.info("do_receive_link_graph(%d, , %d)" % (page, depth)) cursor = connection.cursor() # Fetch the missing links. if(fetch_missing): delete_cursor = connection.cursor() - cursor.execute('''SELECT pages.title, failed_to_fetch.depth, failed_to_fetch.page + cursor.execute('''SELECT failed_to_fetch.depth, failed_to_fetch.page FROM failed_to_fetch - LEFT JOIN pages ON pages.page_id=failed_to_fetch.page''') - for t, d, p in cursor: - receive_link_graph(t, connection, d, fetch_missing=False) + ''') + for d, p in cursor: + do_receive_link_graph(p, connection, d, fetch_missing=False) delete_cursor.execute("DELETE FROM failed_to_fetch WHERE page=%s", (p,)) - if(depth < 0): - # end of recursion - return - - page = get_page_id(title, connection) cursor = connection.cursor() cursor.execute("SELECT COUNT(source) FROM links WHERE source=%s", (page,)) @@ -77,19 +90,18 @@ def receive_link_graph(title, connection, depth, fetch_missing=True): # we fetched that title already return - logger.info("fetching links for {}".format(title)) + logger.info("fetching links for {}".format(page)) - for link in _receive_links(title, connection): + for link in _receive_links(page, connection): try: - receive_link_graph(link, connection, depth - 1) + do_receive_link_graph(link, connection, depth - 1) except NoMoreProxiesException as e: logger.exception("All proxies are blocked") # Wikipedia blocked all our proxies. # Retry later, so we have to store our list that is still to fetch. cursor = connection.cursor() - failed_page = get_page_id(link, connection) - cursor.execute("INSERT INTO failed_to_fetch(page, depth) VALUES(%s, %s)", (failed_page, depth - 1)) + cursor.execute("INSERT INTO failed_to_fetch(page, depth) VALUES(%s, %s)", (link, depth - 1)) connection.commit()