diff --git a/thesis/chapters/appendix.tex b/thesis/chapters/appendix.tex index b89aca8..ef0fd48 100644 --- a/thesis/chapters/appendix.tex +++ b/thesis/chapters/appendix.tex @@ -74,9 +74,12 @@ It has been mentioned several times that the implementation using implementation. To support this statement a simple benchmark can be used. The relatively simple Pauli $X$ is used, more complicated gates like $CX$ or $H$ have worse performance when implemented in \lstinline{python}. The performance -improvement when using the \lstinline{ufunc} is around $1.7$ in this tested +improvement when using the \lstinline{ufunc} is a factor around $6.4$ in this tested case. One must however note that the tested \lstinline{python} code is not realistic and in a possible applications there would be a significant overhead. \lstinputlisting[title={Code to Benchmark \lstinline{ufunc} Gates against Python}, language=Python, breaklines=True]{extra_benchmark/benchmark.py} +When using \lstinline{result_py[0::2] = qm_state[1::2]} the result is identical and +the performance is only increased by a factor around $1.7$. This method is however not +applicable to general act-qbits and the bit mask has to be used. diff --git a/thesis/extra_benchmark/benchmark.py b/thesis/extra_benchmark/benchmark.py index 36b1ef7..a0759a9 100644 --- a/thesis/extra_benchmark/benchmark.py +++ b/thesis/extra_benchmark/benchmark.py @@ -23,13 +23,15 @@ time_uf = timeit.repeat("result_uf = gate_uf(qm_state, cl_state)" , number=1_000_000 ) -time_py = timeit.repeat("result_py = np.zeros(2**10, dtype=np.cdouble);" - "result_py[0::2] = qm_state[1::2];" - "result_py[1::2] = qm_state[0::2];" - "cl_py = np.zeros(10, dtype=np.int8)" +time_py = timeit.repeat( + "result_py = np.zeros(2**10, dtype=np.cdouble);" + "result_py[~bit_mask] = qm_state[bit_mask];" + "result_py[bit_mask] = qm_state[~bit_mask];" + "cl_py = np.zeros(10, dtype=np.int8)" , setup="import numpy as np;" "qm_state = np.zeros(2**10, dtype=np.cdouble);" "qm_state[0] = 1;" + "bit_mask = np.array([1 if (i & (1 << 0)) else 0 for i in range(2**10)])" , repeat=5 , number=1_000_000 ) @@ -38,10 +40,11 @@ print(" done") print("running test ...", end="", flush=True) result_uf, cl, m = gate_uf(qm_state, cl_state); +bit_mask = np.array([1 if (i & (1 << 0)) else 0 for i in range(2**10)]).astype(np.bool) result_py = np.zeros(2**10, dtype=np.cdouble) cl_py = np.zeros(10, dtype=np.int8) -result_py[0::2] = qm_state[1::2] -result_py[1::2] = qm_state[0::2] +result_py[~bit_mask] = qm_state[bit_mask] +result_py[bit_mask] = qm_state[~bit_mask] assert np.allclose(result_py, result_uf) print(" done")