From e72a1a118dcecf6d460983e9feb86d3c349930b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Fri, 11 Oct 2019 11:46:27 +0200 Subject: [PATCH] initial tests --- .../graph_storage/array_sorted_list/setup.py | 3 + .../array_sorted_list/sorted_list.c | 375 ++++++++++++++++++ tests/graph_storage/array_sorted_list/test.py | 9 + tests/graph_storage/plot_all_array.py | 96 +++++ tests/graph_storage/plot_dok.py | 96 +++++ tests/graph_storage/plot_networkx.py | 90 +++++ tests/graph_storage/requirements.tx | 3 + 7 files changed, 672 insertions(+) create mode 100644 tests/graph_storage/array_sorted_list/setup.py create mode 100644 tests/graph_storage/array_sorted_list/sorted_list.c create mode 100644 tests/graph_storage/array_sorted_list/test.py create mode 100644 tests/graph_storage/plot_all_array.py create mode 100644 tests/graph_storage/plot_dok.py create mode 100644 tests/graph_storage/plot_networkx.py create mode 100644 tests/graph_storage/requirements.tx diff --git a/tests/graph_storage/array_sorted_list/setup.py b/tests/graph_storage/array_sorted_list/setup.py new file mode 100644 index 0000000..da19a2e --- /dev/null +++ b/tests/graph_storage/array_sorted_list/setup.py @@ -0,0 +1,3 @@ +from distutils.core import setup, Extension +setup(name="sorted_list", version="1.0", + ext_modules=[Extension("sorted_list", ["sorted_list.c"], extra_compile_args=["-g"])]) diff --git a/tests/graph_storage/array_sorted_list/sorted_list.c b/tests/graph_storage/array_sorted_list/sorted_list.c new file mode 100644 index 0000000..2614784 --- /dev/null +++ b/tests/graph_storage/array_sorted_list/sorted_list.c @@ -0,0 +1,375 @@ +#ifdef C_TEST +#define npy_intp int64_t +#else +#include +#include +//#include +#include +#endif +#include + +typedef struct ll_node_s +{ + struct ll_node_s * next; + npy_intp value; +} ll_node_t; + +void +ll_recursively_delete_list(ll_node_t * list) +{ + ll_node_t * next_node; + while(list) + { + next_node = list->next; + free(list); + list = next_node; + } +} + +ll_node_t * +ll_node_t_new(ll_node_t * next, npy_intp value) +{ + ll_node_t * node = malloc(sizeof(ll_node_t)); + if(!node) + { + return NULL; + } + node->next = next; + node->value = value; + return node; +} + +int +ll_insert_value(ll_node_t ** list, npy_intp value) +{ + ll_node_t * current_node; + ll_node_t * last_node; + ll_node_t * new_node; + + if(!*list) + { + *list = ll_node_t_new(NULL, value); + if(*list) + { + return 0; + } + return 1; + } + + current_node = *list; + last_node = *list; + while(current_node && current_node->value < value) + { + last_node = current_node; + current_node = current_node->next; + + } + + if(current_node && current_node->value == value) + { + return 2; + } + + + new_node = ll_node_t_new(current_node, value); + if(!new_node) + { + return 1; + } + // This is the case, when we set the first element. + if(current_node == last_node) + { + *list = new_node; + return 0; + } + last_node->next = new_node; + return 0; +} + +int +ll_delete_value(ll_node_t ** list, npy_intp value) +{ + ll_node_t * current_node; + ll_node_t * last_node; + + current_node = *list; + last_node = *list; + + while(current_node && current_node->value < value) + { + last_node = current_node; + current_node = current_node->next; + } + + if(!current_node || current_node->value != value) + { + return 2; + } + + if(current_node == last_node) + { + *list = current_node->next; + } + + last_node->next = current_node->next; + free(current_node); + return 0; +} + +int +ll_has_value(ll_node_t * list, npy_intp value) +{ + while(list && list->value < value) + { + list = list->next; + } + + if(list && list->value == value) + { + return 1; + } + return 0; +} + +#ifdef C_TEST + +#include + +void +print_list(ll_node_t * list) +{ + while(list) + { + printf("%d->", list->value); + list = list->next; + } + printf("NULL\n"); +} + +int +main(void) +{ + ll_node_t * list = NULL; + int i; + + npy_intp array[15] = {14, 5, 3, 13, 11, 2, 6, 1, 12, 4, 9, 10, 0, 7, 8}; + + for(i = 0; i < 15; i++) + { + ll_insert_value(&list, array[i]); + } + + print_list(list); + + ll_delete_value(&list, 5); + print_list(list); + + ll_delete_value(&list, 0); + print_list(list); + + printf("HAS 2 %d\n", ll_has_value(list, 2)); + printf("HAS 5 %d\n", ll_has_value(list, 5)); + printf("HAS 20 %d\n", ll_has_value(list, 20)); + + + ll_recursively_delete_list(list); + return 0; +} + + +#else + +typedef struct +{ + PyObject_HEAD + ll_node_t ** lists; + npy_intp length; + +} ALL_Array; + +static int +ALL_Array_init(ALL_Array * self + , PyObject * args + , PyObject * kwds) +{ + static char * kwrds[] = {"length", NULL}; + npy_intp length; + + if(!PyArg_ParseTupleAndKeywords(args, kwds, "I", kwrds, &length)) + { + return -1; + } + + if(length <= 0) + { + PyErr_SetString(PyExc_ValueError, "length must be positive"); + return -1; + } + + self->lists = calloc(sizeof(ll_node_t), length); + self->length = length; + if(!self->lists) + { + PyErr_SetString(PyExc_MemoryError, "out of memory"); + return -1; + } + return 0; +} + +static PyObject * +ALL_Array_getitem(ALL_Array * self + , PyObject * args) +{ + // Somehow PyArg_ParseTuple does utter crap if I + // do not set i and j explicitly before calling PyArg_ParseTuple. + npy_intp i = 0xdeadbeef, j = 0xdeadbeef, swp, result; + PyObject * result_obj; + + if(!PyArg_ParseTuple(args, "II", &i, &j)) + { + return NULL; + } + + if(i < j) + { + swp = i; + i = j; + j = swp; + } + + if(j < 0) + { + PyErr_SetString(PyExc_KeyError, "index must be positive"); + return NULL; + } + if(i >= self->length) + { + PyErr_SetString(PyExc_KeyError, "index out of range"); + return NULL; + } + + result = ll_has_value(self->lists[i], j); + result_obj = Py_BuildValue("i", result); + return result_obj; +} + +static PyObject * +ALL_Array_setitem(ALL_Array * self + , PyObject * args) +{ + npy_intp i = 0xdeadbeef, j = 0xdeadbeef, swp, value = 0xdeadbeef; + //npy_intp i, j, swp, value; + int result; + + if(!PyArg_ParseTuple(args, "IIp", &i, &j, &value)) + { + return NULL; + } + + + if(i < j) + { + swp = i; + i = j; + j = swp; + } + + if(i >= self->length) + { + PyErr_SetString(PyExc_KeyError, "index out of range"); + return NULL; + } + + if(value) + { + result = ll_insert_value(&(self->lists[i]), j); + if(result == 2) + { + PyErr_SetString(PyExc_ValueError, "element is already set"); + return NULL; + } + if(result == 1) + { + PyErr_SetString(PyExc_MemoryError, "failed to allocate new node"); + return NULL; + } + Py_RETURN_NONE; + } + else + { + result = ll_delete_value(&(self->lists[i]), j); + + if(result) + { + PyErr_SetString(PyExc_ValueError, "element is not set"); + return NULL; + } + Py_RETURN_NONE; + } +} + +static void +ALL_Array_dealloc(ALL_Array * self) +{ + int i; + for(i = 0; i < self->length; i++) + { + ll_recursively_delete_list(self->lists[i]); + } + free(self->lists); + Py_TYPE(self)->tp_free((PyObject *) self); +} + +static PyMemberDef ALL_Array_members[] = {{NULL}}; +static PyMethodDef ALL_Array_methods[] = { + {"setitem", (PyCFunction) ALL_Array_setitem, METH_VARARGS, "sets an item"} + , {"getitem", (PyCFunction) ALL_Array_getitem, METH_VARARGS, "gets an item"} + , {NULL} +}; + +static PyTypeObject ALL_ArrayType = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "sorted_list.ALL_Array", + .tp_doc = "special type for graph representation", + .tp_basicsize = sizeof(ALL_Array), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .tp_new = PyType_GenericNew, + .tp_init = (initproc) ALL_Array_init, + .tp_dealloc = (destructor) ALL_Array_dealloc, + .tp_members = ALL_Array_members, + .tp_methods = ALL_Array_methods, +}; + +static PyModuleDef sorted_listmodule = { + PyModuleDef_HEAD_INIT, + .m_name = "sorted_list", + .m_doc = "module containing a special graph storage class", + .m_size = -1, +}; + +PyMODINIT_FUNC +PyInit_sorted_list(void) +{ + PyObject * m; + if(PyType_Ready(&ALL_ArrayType) < 0) + { + return NULL; + } + + m = PyModule_Create(&sorted_listmodule); + if(!m) + { + return NULL; + } + + Py_INCREF(&ALL_ArrayType); + if(PyModule_AddObject(m, "ALL_Array", (PyObject *) &ALL_ArrayType) < 0) + { + Py_DECREF(&ALL_ArrayType); + Py_DECREF(m); + return NULL; + } + return m; +} +#endif diff --git a/tests/graph_storage/array_sorted_list/test.py b/tests/graph_storage/array_sorted_list/test.py new file mode 100644 index 0000000..6d91fe6 --- /dev/null +++ b/tests/graph_storage/array_sorted_list/test.py @@ -0,0 +1,9 @@ +from sorted_list import ALL_Array +a = ALL_Array(1000) +print("constructed ALL_Array:", a) + +print("getting an item from array ...") +print(a.getitem(1, 100)) + +a.setitem(1, 100, True) +print(a.getitem(1, 100)) diff --git a/tests/graph_storage/plot_all_array.py b/tests/graph_storage/plot_all_array.py new file mode 100644 index 0000000..cf5fbb7 --- /dev/null +++ b/tests/graph_storage/plot_all_array.py @@ -0,0 +1,96 @@ +import timeit +import itertools +import sys +import random +import matplotlib.pyplot as plt +import matplotlib.ticker as mtick + +from sorted_list import ALL_Array + +plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e')) +plt.gcf().set_figheight(10) +plt.gcf().set_figwidth(20) + +def construction(node_number): + return ALL_Array(node_number) + +number = 100 +repeat = 100 +node_numbers = [100, 500, 1000, 1500, 2000, 3000, 4000, 6000, 8000, 10000, 20000, 50000, 100000, 500000, 1000000] + +result = list() + +for node_number in node_numbers: + print("running trial with", node_number, "nodes... ", end="", flush=True) + timer = timeit.Timer(lambda: construction(node_number)) + result.append(min(timer.repeat(repeat=repeat, number=number)) / number) + print("done") + +h0, = plt.plot(node_numbers, result, "go-", label="Graph Construction Time in $s$") +plt.title("Graph Construction Time in Seconds ({} loops, best out of {})".format(number, repeat)) +plt.xlabel("Number of Nodes") +plt.ylabel("Time in $s$") +plt.legend(handles=[h0]) + +plt.savefig("runtime_all_array_graph_construction.png", dpi=400) +print("figure saved to runtime_all_array_graph_construction.png") + +plt.clf() +plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e')) +plt.gcf().set_figheight(10) +plt.gcf().set_figwidth(20) + +edge_counts = [10, 20, 50, 100, 500, 1000] +edge_counts = [e * 10 for e in edge_counts] + +def add_edges(edges, node_number): + g = construction(node_number) + for edge in edges: + g.setitem(*edge, 1) + +#edges = [[(list(itertools.product(list(range(e//2, e//2 + e//4)), 2))[:c], e) +# for e in node_numbers] +# for c in edge_counts] + + + +edges = [] +for c in edge_counts: + m = [] + for e in node_numbers: + edge_from = list(range(e//2)) + edge_to = list(range(e//2, e//2 + e//4)) + + random.shuffle(edge_from) + random.shuffle(edge_to) + + m.append(([i for i,_ in zip(itertools.product(edge_from, edge_to), range(c))], e)) + edges.append(m) + +#results = [[min(timeit.Timer(lambda: add_edges(*e)).repeat(repeat=repeat, number=number) for e in i] +# for i in edges] + +results = [] + +for i,edge_count in zip(edges, edge_counts): + m = [] + for e,node_number in zip(i, node_numbers): + print("running trial with", node_number, "nodes;", edge_count, "edges ... ", end="", flush=True) + m.append(min(timeit.Timer(lambda: add_edges(*e)).repeat(repeat=repeat, number=number)) / number) + print("done") + results.append(m) + + +# remove construction time +results = [[(t - ct) / edge_count for t, ct in zip(times, result)] for times, edge_count in zip(results, edge_counts)] + +handles = [plt.plot(node_numbers, result, label="When adding {} edges".format(ec))[0] for result, ec in zip(results, edge_counts)] + +plt.legend(handles=handles) + +plt.title("Time to add one Edge in Seconds ({} loops, best out of {})".format(number, repeat)) +plt.xlabel("Number of Nodes") +plt.ylabel("Time in $s$") + +plt.savefig("runtime_all_array_graph_add_edges.png", dpi=400) +print("figure saved to runtime_all_array_graph_add_edges.png") diff --git a/tests/graph_storage/plot_dok.py b/tests/graph_storage/plot_dok.py new file mode 100644 index 0000000..b18d60f --- /dev/null +++ b/tests/graph_storage/plot_dok.py @@ -0,0 +1,96 @@ +import timeit +import itertools +import sys +import random +import matplotlib.pyplot as plt +import matplotlib.ticker as mtick + +from scipy.sparse import dok_matrix + + +plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e')) +plt.gcf().set_figheight(10) +plt.gcf().set_figwidth(20) + +def construction(node_number): + return dok_matrix((node_number, node_number)) + +number = 100 +repeat = 100 +node_numbers = [100, 500, 1000, 1500, 2000, 3000, 4000, 6000, 8000, 10000, 20000, 50000, 100000, 500000, 1000000] + +result = list() + +for node_number in node_numbers: + print("running trial with", node_number, "nodes... ", end="", flush=True) + timer = timeit.Timer(lambda: construction(node_number)) + result.append(min(timer.repeat(repeat=repeat, number=number)) / number) + print("done") + +h0, = plt.plot(node_numbers, result, "go-", label="Graph Construction Time in $s$") +plt.title("Graph Construction Time in Seconds ({} loops, best out of {})".format(number, repeat)) +plt.xlabel("Number of Nodes") +plt.ylabel("Time in $s$") +plt.legend(handles=[h0]) + +plt.savefig("runtime_dok_matrix_graph_construction.png", dpi=400) +print("figure saved to runtime_dok_matrix_graph_construction.png") + +plt.clf() +plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e')) +plt.gcf().set_figheight(10) +plt.gcf().set_figwidth(20) + +edge_counts = [10, 20, 50, 100, 500, 1000] +edge_counts = [e * 10 for e in edge_counts] + +def add_edges(edges, node_number): + g = construction(node_number) + for edge in edges: + g[edge] = 1 + +#edges = [[(list(itertools.product(list(range(e//2, e//2 + e//4)), 2))[:c], e) +# for e in node_numbers] +# for c in edge_counts] + + +edges = [] +for c in edge_counts: + m = [] + for e in node_numbers: + edge_from = list(range(e//2)) + edge_to = list(range(e//2, e//2 + e//4)) + + random.shuffle(edge_from) + random.shuffle(edge_to) + + m.append(([i for i,_ in zip(itertools.product(edge_from, edge_to), range(c))], e)) + edges.append(m) + +#results = [[min(timeit.Timer(lambda: add_edges(*e)).repeat(repeat=repeat, number=number) for e in i] +# for i in edges] + +results = [] + + +for i,edge_count in zip(edges, edge_counts): + m = [] + for e,node_number in zip(i, node_numbers): + print("running trial with", node_number, "nodes;", edge_count, "edges ... ", end="", flush=True) + m.append(min(timeit.Timer(lambda: add_edges(*e)).repeat(repeat=repeat, number=number)) / number) + print("done") + results.append(m) + +# remove construction time +results = [[(t - ct) / edge_count for t, ct in zip(times, result)] for times, edge_count in zip(results, edge_counts)] + +handles = [plt.plot(node_numbers, result, label="When adding {} edges".format(ec))[0] for result, ec in zip(results, edge_counts)] + +plt.legend(handles=handles) + +plt.title("Time to add one Edge in Seconds ({} loops, best out of {})".format(number, repeat)) +plt.xlabel("Number of Nodes") +plt.ylabel("Time in $s$") + +plt.savefig("runtime_all_array_graph_add_edges.png", dpi=400) +print("figure saved to runtime_dok_matrix_graph_add_edges.png") diff --git a/tests/graph_storage/plot_networkx.py b/tests/graph_storage/plot_networkx.py new file mode 100644 index 0000000..64c1392 --- /dev/null +++ b/tests/graph_storage/plot_networkx.py @@ -0,0 +1,90 @@ +import timeit +import itertools +import sys +import random +import matplotlib.pyplot as plt +import matplotlib.ticker as mtick + +from networkx import Graph + +plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e')) +plt.gcf().set_figheight(10) +plt.gcf().set_figwidth(20) + +def construction(node_number): + g = Graph() + g.add_nodes_from(list(range(node_number))) + return g + +number = 100 +repeat = 100 +node_numbers = [100, 500, 1000, 1500, 2000, 3000, 4000, 6000, 8000, 10000, 20000, 50000, 100000, 500000, 1000000] + +result = list() + +for node_number in node_numbers: + print("running trial with", node_number, "nodes... ", end="", flush=True) + timer = timeit.Timer(lambda: construction(node_number)) + result.append(min(timer.repeat(repeat=repeat, number=number)) / number) + print("done") + +h0, = plt.plot(node_numbers, result, "go-", label="Graph Construction Time in $s$") +plt.title("Graph Construction Time in Seconds ({} loops, best out of {})".format(number, repeat)) +plt.xlabel("Number of Nodes") +plt.ylabel("Time in $s$") +plt.legend(handles=[h0]) + +plt.savefig("runtime_networkx_graph_construction.png", dpi=400) +print("figure saved to runtime_networkx_graph_construction.png") + +plt.clf() +plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e')) +plt.gcf().set_figheight(10) +plt.gcf().set_figwidth(20) + +edge_counts = [10, 20, 50, 100, 500, 1000] +edge_counts = [e * 10 for e in edge_counts] + +def add_edges(edges, node_number): + g = construction(node_number) + for edge in edges: + g.add_edge(*edge) + +#edges = [[(list(itertools.product(list(range(e//2, e//2 + e//4)), 2))[:c], e) +# for e in node_numbers] +# for c in edge_counts] +edges = [] +for c in edge_counts: + m = [] + for e in node_numbers: + m.append(([i for i,_ in zip(itertools.product(list(range(e//2)) + , list(range(e//2, e//2 + e//4))), range(c))], e)) + edges.append(m) + +#results = [[min(timeit.Timer(lambda: add_edges(*e)).repeat(repeat=repeat, number=number) for e in i] +# for i in edges] + +results = [] + +for i,edge_count in zip(edges, edge_counts): + m = [] + for e,node_number in zip(i, node_numbers): + print("running trial with", node_number, "nodes;", edge_count, "edges ... ", end="", flush=True) + m.append(min(timeit.Timer(lambda: add_edges(*e)).repeat(repeat=repeat, number=number)) / number) + print("done") + results.append(m) + + +# remove construction time +results = [[(t - ct) / edge_count for t, ct in zip(times, result)] for times, edge_count in zip(results, edge_counts)] + +handles = [plt.plot(node_numbers, result, label="When adding {} edges".format(ec))[0] for result, ec in zip(results, edge_counts)] + +plt.legend(handles=handles) + +plt.title("Time to add one Edge in Seconds ({} loops, best out of {})".format(number, repeat)) +plt.xlabel("Number of Nodes") +plt.ylabel("Time in $s$") + +plt.savefig("runtime_networkx_graph_add_edges.png", dpi=400) +print("figure saved to runtime_networkx_graph_add_edges.png") diff --git a/tests/graph_storage/requirements.tx b/tests/graph_storage/requirements.tx new file mode 100644 index 0000000..1a0cbc8 --- /dev/null +++ b/tests/graph_storage/requirements.tx @@ -0,0 +1,3 @@ +#graph-tool +networkx +scipy