added description of repo content, added missing experimental script

67c0aa0b · Maria Hartmann · 31d48f01 · 67c0aa0b · 67c0aa0b · 67c0aa0b
Commit 67c0aa0b authored 4 years ago by Maria Hartmann
--- a/README.md
+++ b/README.md
 This repository contains an implementation of smooth heap and pairing heap, as well as scripts to reproduce experimental findings.
 # Documentation
- TODO...
+### Heap implementations
+The top level contains the actual heap implementations used in experiments, as well as a low-level implementation of smooth heap.
+- `pairing_heap.py` 'Universal heap', bundles all variant implementations.
+- `pairing_heap_standard.py` Implements the standard pairing heap variant; used for sorting experiments.
+- `pairing_heap_l.py` Implements lazy-linking variant of standard pairing heap; used for experiments with Dijkstra's algorithm.
+- `smooth_heap.py` Implements analytical variant of smooth heap; used for sorting heap experiments.
+- `smooth_heap_l.py` Implements slightly modified lazy-linking variant of smooth heap; used for experiments with Dijkstra's algorithm.
+- `smooth_heap.c` Sample implementation of smooth heap in C, not used in experiments.
+### Experimental scripts
+All scripts running experiments are located in the /scripts folder. Each script generates two .csv files of results, reporting average number of linking operations and comparisons, respectively.
+These files are stored in the /data folder. Plots of results are stored in the /plots folder.
+- `paper-permutations.py` Generates sample plots of classes of permutations used for the sorting tests. Images generated are saved to /plots folder.
+- `paper-sorting-loc.py` Performs sorting on random instances of the class of localized permutations.
+- `paper-sorting-sep.py` Performs sorting on random separable permutations.
+- `paper-sorting-subseq.py` Performs sorting on random permutations containing sorted subsequences.
+- `paper-sorting-uniform.py` Performs sorting on uniformly random permutations.
+- `paper-dijkstra-test.py` Performs Dijkstra's algorithm on randomly generated Erdös-Renyi graphs of fixed size and variable edge probability.
+- `paper-dijkstra-test2.py` Performs Dijkstra's algorithm on randomly generated
+k-regular graphs of variable size.
\ No newline at end of file
--- a/scripts/paper-dijkstra-test-new.py
+++ b/scripts/paper-dijkstra-test-new.py
--- a/scripts/paper-dijkstra-test2-new.py
+++ b/scripts/paper-dijkstra-test2-new.py
--- a/scripts/paper-sorting-loc-new.py
+++ b/scripts/paper-sorting-loc-new.py
--- a/scripts/paper-sorting-sep-new.py
+++ b/scripts/paper-sorting-sep-new.py
--- a/scripts/paper-sorting-subseq-new.py
+++ b/scripts/paper-sorting-subseq-new.py
--- a/scripts/paper-sorting-uniform.py
+++ b/scripts/paper-sorting-uniform.py
+#!/usr/bin/python3
+"""Experimental script comparing performance of pairing heap and smooth heap
+in 'sorting mode': n inserts followed by n delete-min operations.
+Input lists are uniformly random permutations of fixed length.
+Results are stored as .csv files in ../data folder and plots of results in ../plots"""
+from random import shuffle
+import matplotlib.pyplot as plt
+import copy
+import csv
+from node import Node
+from pairing_heap import PairingHeap
+import os, sys, inspect
+# ensuring imports work
+current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parent_dir = os.path.dirname(current_dir)
+sys.path.insert(0, parent_dir)
+COUNT_TYPE_BOTH = 0
+COUNT_TYPE_LINKS = -1
+COUNT_TYPE_COMPS = -2
+MAXSIZE = 18
+NUMBER_TESTS = 5  # number of tests to run
+TEST_SIZES = [j for j in range(MAXSIZE)]  # 
+LIST_LEN = 10000  # number of elements in test list
+TEST_SIZE = 10000  # number of elements in test list
+STEP_SIZE = 100
+INCREMENT_LOC = 0.005
+INCREMENT_SUBSEQS = 100
+TYPES = {0: "Pairing", 12: "Smooth"}
+MAX_TYPE_KEY = max(TYPES.keys())
+COLOURS = {0: 'xkcd:fire engine red', 12: 'xkcd:green'}
+SHADE_COLOURS = {0: '#fe4d4e', 12: '#58ab8e'}
+def isSorted(list0):
+    return all(list0[i] < list0[i + 1] for i in range(len(list0) - 1))
+def plot_avg_counts_uni(avgCounts):
+    # colours from https://xkcd.com/color/rgb/
+    MARKERS_COMP = {0: "o", 12: "^"}  # https://matplotlib.org/3.1.1/api/markers_api.html
+    MARKERS_LINK = {0: "o", 12: "D"}
+    plt.figure('avg number of operations by heap type')
+    for k in TYPES.keys():
+        avgComps = [acounts[k] for acounts in avgCounts[0]]
+        maxComps = [acounts[k] for acounts in avgCounts[2]]
+        minComps = [acounts[k] for acounts in avgCounts[4]]
+        plt.plot([2 ** p for p in range(4, MAXSIZE)], avgComps[3:MAXSIZE - 1], color=COLOURS[k], linestyle="-",
+                 marker=MARKERS_COMP[k], markerfacecolor=COLOURS[k], markersize=9, markeredgewidth=1,
+                 markeredgecolor='black', label=TYPES[k] + " comparisons")
+        plt.fill_between([2 ** p for p in range(4, MAXSIZE)], minComps[3:MAXSIZE - 1], maxComps[3:MAXSIZE - 1],
+                         color=SHADE_COLOURS[k], alpha=.3)
+        avgLinks = [acounts[k] for acounts in avgCounts[1]]
+        maxLinks = [acounts[k] for acounts in avgCounts[3]]
+        minLinks = [acounts[k] for acounts in avgCounts[5]]
+        plt.plot([2 ** p for p in range(4, MAXSIZE)], avgLinks[3:MAXSIZE - 1], color=COLOURS[k], linestyle="--",
+                 marker=MARKERS_LINK[k], markerfacecolor=COLOURS[k], markersize=9, markeredgewidth=1,
+                 markeredgecolor='black', label=TYPES[k] + " links")
+        plt.fill_between([2 ** p for p in range(4, MAXSIZE)], minLinks[3:MAXSIZE - 1], maxLinks[3:MAXSIZE - 1],
+                         color=SHADE_COLOURS[k], alpha=.3)
+    # plt.title('Sorting random permutations', fontsize=25)
+    plt.xlabel('Input size', fontsize=26)
+    plt.ylabel('Avg. number of operations / size', fontsize=26)
+    plt.xticks(fontsize=20)
+    plt.yticks(fontsize=20)
+    plt.rc('legend', fontsize=26)  # using a size in points
+    plt.legend()
+    plt.grid(True)
+    figure = plt.gcf()  # get current figure
+    figure.set_size_inches(16, 18)  # set figure's size manually to your full screen (32x18)
+    plt.savefig('../plots/paper-sorting-uniform-new.svg', bbox_inches='tight')  # bbox_inches removes extra white spaces
+    plt.legend(loc='best')
+    plt.show()
+def export_results(params, results, countType, heapTypes, filename="dijkstra"):
+    # parse data as randomness parameter; counts per heap type
+    if countType == COUNT_TYPE_BOTH:
+        with open("../data/" + filename + '-comps.csv', 'w', newline='') as csvfile:
+            csvwriter = csv.writer(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
+            csvwriter.writerow(["randomness parameter value"] + [name for name in TYPES.values()])
+            csvwriter.writerow(["randomness parameter value"] + [name for name in TYPES.keys()])
+            for i in range(len(results[0])):
+                row = [params[i]] + [results[0][i][k] for k in TYPES.keys()]
+                csvwriter.writerow(row)
+        with open("../data/" + filename + '-links.csv', 'w', newline='') as csvfile:
+            csvwriter = csv.writer(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
+            csvwriter.writerow(["randomness parameter value"] + [name for name in TYPES.values()])
+            csvwriter.writerow(["randomness parameter value"] + [name for name in TYPES.keys()])
+            for i in range(len(results[1])):
+                row = [params[i]] + [results[1][i][k] for k in TYPES.keys()]
+                csvwriter.writerow(row)
+    else:
+        fn = "../data/" + filename + '-links.csv' if countType == COUNT_TYPE_LINKS else "../data/" + filename + '-comps.csv'
+        with open(fn, 'w', newline='') as csvfile:
+            csvwriter = csv.writer(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
+            csvwriter.writerow(["randomness parameter value"] + [name for name in TYPES.values()])
+            csvwriter.writerow(["randomness parameter value"] + [name for name in TYPES.keys()])
+            for i in range(len(results)):
+                row = [params[i]] + [results[i][k] for k in TYPES.keys()]
+                csvwriter.writerow(row)
+if __name__ == "__main__":
+    testOutputCount = []
+    avgLinksPerSize = []
+    avgCompsPerSize = []
+    maxLinksPerSize = []
+    maxCompsPerSize = []
+    minLinksPerSize = []
+    minCompsPerSize = []
+    sortedInput = []
+    # testInput = []
+    # ----------separable permutation---------------------
+    # parameter: length (must be power of two)
+    params = [2 ** p for p in range(1, MAXSIZE)]
+    for x in params:
+        sortedInput = [k for k in range(x)]
+        avgCountsLinks = [0 for _ in range(MAX_TYPE_KEY + 1)]
+        avgCountsComps = [0 for _ in range(MAX_TYPE_KEY + 1)]
+        maxCountsLinks = [0 for _ in range(MAX_TYPE_KEY + 1)]
+        maxCountsComps = [0 for _ in range(MAX_TYPE_KEY + 1)]
+        minCountsLinks = [1000000000000 for _ in range(MAX_TYPE_KEY + 1)]
+        minCountsComps = [1000000000000 for _ in range(MAX_TYPE_KEY + 1)]
+        for zz in range(NUMBER_TESTS):
+            testInput = copy.copy(sortedInput)
+            shuffle(testInput)  # pseudo-random permutation in-place
+            testInput[0] = -1
+            for heapType in TYPES.keys():
+                linkCount = 0
+                compCount = 0
+                testOutput = []
+                heap = PairingHeap(heapType, COUNT_TYPE_BOTH)
+                heap.make_heap()
+                for element in testInput:
+                    node = Node(element)
+                    (cc, lc) = heap.insert(node)
+                for i in range(len(testInput)):
+                    (minNode, cc, lc) = heap.delete_min()
+                    testOutput += [minNode.key]
+                    compCount += cc
+                    linkCount += lc
+                if isSorted(testOutput):  # sanity check
+                    # divide by size for visualization
+                    avgCountsLinks[heapType] += (linkCount / x) / NUMBER_TESTS
+                    avgCountsComps[heapType] += (compCount / x) / NUMBER_TESTS
+                    maxCountsLinks[heapType] = max(maxCountsLinks[heapType], linkCount / x)
+                    maxCountsComps[heapType] = max(maxCountsComps[heapType], compCount / x)
+                    minCountsLinks[heapType] = min(minCountsLinks[heapType], linkCount / x)
+                    minCountsComps[heapType] = min(minCountsComps[heapType], compCount / x)
+                else:
+                    raise Exception("Invalid result for {}".format(TYPES[heapType]))
+                print("[{}: {}, {}/{}] \t Links: {} \t Comps: {}".format(
+                    TYPES[heapType], x, zz + 1, NUMBER_TESTS, linkCount, compCount))
+        for heapType in TYPES.keys():
+            print("[{}: {}, avg] \t Links: {} \t Comps: {}".format(TYPES[heapType], x, avgCountsLinks[heapType],
+                                                                   avgCountsComps[heapType]))
+        avgLinksPerSize += [avgCountsLinks]
+        avgCompsPerSize += [avgCountsComps]
+        maxLinksPerSize += [maxCountsLinks]
+        maxCompsPerSize += [maxCountsComps]
+        minLinksPerSize += [minCountsLinks]
+        minCompsPerSize += [minCountsComps]
+    plot_avg_counts_uni(
+        [avgCompsPerSize, avgLinksPerSize, maxCompsPerSize, maxLinksPerSize, minCompsPerSize, minLinksPerSize])
+    export_results(params, [avgCompsPerSize, avgLinksPerSize], COUNT_TYPE_BOTH, TYPES, "sorting-uniform-new")