diff --git a/admin/sitegen-lib/dependencies.py b/admin/sitegen-lib/dependencies.py
new file mode 100644
--- /dev/null
+++ b/admin/sitegen-lib/dependencies.py
@@ -0,0 +1,25 @@
+"""
+The dependencies of an AFP entry are listed in the ROOT file, and as it
+is regular, this script uses a regular expression to extract the dependencies
+and adds them to the JSON file of the entry.
+"""
+import json
+import os
+
+from write_file import write_file
+
+
+def add_dependencies(entries_dir, dependencies_file):
+    """For each entry in the thys/ directory, extract the dependencies and add
+    them to the JSON file."""
+
+    with open(dependencies_file) as dep:
+        dependencies = json.load(dep)
+
+    for entry in os.listdir(entries_dir):
+        shortname = entry[:-3]
+        entry_deps = dependencies[shortname]
+        afp_deps = entry_deps["afp_deps"]
+
+        data = {"dependencies": afp_deps}
+        write_file(os.path.join(entries_dir, entry), data)
diff --git a/admin/sitegen-lib/keywords.py b/admin/sitegen-lib/keywords.py
new file mode 100644
--- /dev/null
+++ b/admin/sitegen-lib/keywords.py
@@ -0,0 +1,57 @@
+"""Generates a list of keywords for the search autocomplete. Each entry’s 
+abstract is sanitised and then the keywords are extracted with the RAKE 
+algorithm.
+"""
+import json
+import os
+import re
+from itertools import groupby
+
+import unidecode
+from rake_nltk import Rake
+import nltk
+
+nltk.download('stopwords')
+nltk.download('punkt')
+
+
+def generate_keywords(entries_dir):
+    """RAKE is used to extract the keywords from every abstract. 
+    
+    The top 8 keywords are added to a list of all keywords and the keywords 
+    that appear in more than two abstracts are preserved. Finally, plurals 
+    are removed."""
+
+    rake_object = Rake(max_length=2)
+
+    replacements = [
+        (r"\s+", " "),
+        (r"<.*?>", ""),
+        (r"[^\w\s/.()',-]", " "),
+        (r"\s+", " "),
+    ]
+
+    keywords = []
+
+    for entry in os.listdir(entries_dir):
+        with open(os.path.join(entries_dir, entry)) as json_file:
+            data = json.load(json_file)
+            text = data["abstract"]
+
+        for old, new in replacements:
+            text = re.sub(old, new, text)
+
+        text = unidecode.unidecode(text)
+
+        rake_object.extract_keywords_from_text(text)
+        keywords = rake_object.get_ranked_phrases()
+
+    # keep keywords that appear in 2 or more abstracts
+    keywords = [i for i, c in groupby(sorted(keywords)) if len(list(c)) > 1]
+
+    # remove plurals if we have the singular
+    for keyword in keywords:
+        if keyword + "s" in keywords:
+            keywords.remove(keyword + "s")
+
+    return [{"id": i, "keyword": x} for i, x in enumerate(keywords)]
diff --git a/admin/sitegen-lib/related.py b/admin/sitegen-lib/related.py
new file mode 100644
--- /dev/null
+++ b/admin/sitegen-lib/related.py
@@ -0,0 +1,111 @@
+"""
+This script generates related entries, using three metrics:
+    * Sharing dependencies
+    * Sharing keywords
+    * Sharing keywords
+
+These are weighted and used to find entries which are likely similar.
+
+These are then added to the entries to improve site navigation.
+"""
+import json
+import os
+
+from keywords import generate_keywords
+from write_file import write_file
+
+
+def add_related(entries_dir):
+    """
+    First three dictionaries are created as follows:
+
+    dependencies = {"dependency": [list-of-entries, ...], ...}
+    keywords = {"keyword": [list-of-entries, ...], ...}
+    topics = {"topic": [list-of-entries, ...], ...}
+
+    Keywords that feature in more than 10 entries are dropped. Then
+    a dictionary is created with the relatedness scores between each
+    entry. Finally, the top three related entries are chosen for each
+    entry.
+    """
+
+    keywords = {}
+
+    for obj in generate_keywords(entries_dir):
+        keywords[obj["keyword"]] = []
+
+
+    dependencies = {}
+    topics = {}
+    for entry in os.listdir(entries_dir):
+        shortname = entry[:-3]
+
+        with open(os.path.join(entries_dir, entry)) as file:
+            data = json.load(file)
+            if "dependencies" in data:
+                for dep in data["dependencies"]:
+                    if dep in dependencies:
+                        dependencies[dep].append(shortname)
+                    else:
+                        dependencies[dep] = [shortname]
+            if "topics" in data:
+                for topic in data["topics"]:
+                    if topic in topics:
+                        topics[topic].append(shortname)
+                    else:
+                        topics[topic] = [shortname]
+            for keyword in keywords.keys():
+                if keyword in data["abstract"].lower():
+                    keywords[keyword].append(shortname)
+
+    for keyword, values in list(keywords.items()):
+        if len(values) > 10:
+            keywords.pop(keyword)
+
+    related_entries = {}
+
+    for dataSet in [(keywords, 1), (dependencies, 1.5), (topics, 0.5)]:
+        populate_related(dataSet[0], related_entries, dataSet[1])
+
+    for entry in related_entries:
+        for keyword, value in list(related_entries[entry].items()):
+            if value <= 2.5:
+                related_entries[entry].pop(keyword)
+
+    final_related = {}
+
+    for keyword, values in related_entries.items():
+        final_related[keyword] = top_three(values)
+
+    for entry, related in final_related.items():
+        if related:
+            data = {"related": related}
+            write_file(os.path.join(entries_dir, entry + ".md"), data)
+
+
+def populate_related(data, related, modifier=1):
+    """This is a heavily nested loop to create the relatedEntries dictionary.
+
+    For each of the categories, the list of entries associated with
+    each key is iterated over twice and, if the entries are not the
+    same, the modifier of that category is added to the relatedness
+    score between the two entries in the dictionary. As the loop
+    iterates twice over the value set, the resulting dictionary is
+    bijective — i.e., the value for A->B will be equal to B->A.
+    """
+    for _, entries in data.items():
+        for keyEntry in entries:
+            for valueEntry in entries:
+                if valueEntry != keyEntry:
+                    if keyEntry in related:
+                        if valueEntry in related[keyEntry]:
+                            related[keyEntry][valueEntry] += modifier
+                        else:
+                            related[keyEntry][valueEntry] = modifier
+                    else:
+                        related[keyEntry] = {valueEntry: modifier}
+
+
+def top_three(dictionary):
+    """Returns the highest three dictionary keys by value"""
+    return sorted(dictionary, key=dictionary.get, reverse=True)[:3]
diff --git a/admin/sitegen-lib/statistics.py b/admin/sitegen-lib/statistics.py
new file mode 100644
--- /dev/null
+++ b/admin/sitegen-lib/statistics.py
@@ -0,0 +1,53 @@
+"""
+Most the statistics for the site, are generated by Hugo. This script, 
+generates other statistics like number of lines in the AFP using the 
+scripts from the current AFP. 
+
+For this script to work, `return data` needs to be added at 
+line 212 in templates.py
+"""
+
+import os
+
+import afpstats
+import metadata
+import templates
+from config import options
+from sitegen import associate_releases, parse, read_versions
+from write_file import write_file
+
+
+def add_statistics(base_dir, thys_dir, data_dir):
+    """Creates the necessary objects to generates the statistics, 
+    then outputs them to the data directory"""
+    options.templates_dir = os.path.join(base_dir, "metadata", "templates")
+    options.dest_dir = data_dir
+
+    entries = parse(os.path.join(base_dir, "metadata", "metadata"))
+    versions = read_versions(os.path.join(base_dir, "metadata", "release-dates"))
+    associate_releases(entries, versions, os.path.join(base_dir, "metadata", "releases"))
+
+    deps_dict = metadata.empty_deps(entries)
+
+    afp_dict = afpstats.afp_dict(entries, thys_dir, deps_dict)
+    afp_dict.build_stats()
+    builder = templates.Builder(options, entries, afp_dict)
+
+    stats = builder.generate_statistics()
+
+    loc_articles = [article.loc for article in stats["articles_by_time"]]
+
+    all_articles = [a.name for a in stats["articles_by_time"]]
+
+    data = {
+        "num_lemmas": stats["num_lemmas"],
+        "num_loc": stats["num_loc"],
+        "articles_year": stats["articles_year"],
+        "loc_years": stats["loc_years"],
+        "author_years": stats["author_years"],
+        "author_years_cumulative": stats["author_years_cumulative"],
+        "loc_articles": loc_articles,
+        "all_articles": all_articles,
+    }
+
+    write_file(os.path.join(data_dir, "statistics.json"), data)
diff --git a/admin/sitegen-lib/templates.py b/admin/sitegen-lib/templates.py
--- a/admin/sitegen-lib/templates.py
+++ b/admin/sitegen-lib/templates.py
@@ -1,230 +1,231 @@
 from collections import OrderedDict
 from itertools import groupby
 import os
 import datetime
 from jinja2 import Environment, FileSystemLoader
 
 import terminal
 
 ### topics
 
 class Tree(object):
     def __init__(self):
         self.subtopics = OrderedDict()
         self.entries = []
 
     def add_topic(self, topic):
         if len(topic) > 0:
             if topic[0] not in self.subtopics:
                 tree = Tree()
                 self.subtopics[topic[0]] = tree
             else:
                 tree = self.subtopics[topic[0]]
             tree.add_topic(topic[1:])
 
     def add_to_topic(self, topic, entry):
         if len(topic) > 0:
             if topic[0] not in self.subtopics:
                 terminal.error(u"In entry {0}: unknown (sub)topic {1}".format(entry, topic), abort=True)
             else:
                 self.subtopics[topic[0]].add_to_topic(topic[1:], entry)
         else:
             self.entries.append(entry)
 
     def __str__(self):
         return self._to_str()
 
     def _to_str(self, indent=0):
         indent_str = ' ' * indent
         result = indent_str + str(self.entries) + "\n"
         for subtopic, tree in self.subtopics.items():
             result += indent_str
             result += subtopic
             result += "\n"
             result += tree._to_str(indent + 2)
         return result
 
 def read_topics(filename):
     tree = Tree()
     stack = []
     with open(filename) as f:
         for line in f:
             count = 0
             while line[count] == ' ':
                 count += 1
             if count % 2:
                 raise Exception(u"Illegal indentation at line '{0}'".format(line))
             level = count // 2
             if level <= len(stack):
                 stack = stack[0:level]
             else:
                 raise Exception(u"Illegal indentation at line '{0}'".format(line))
             stack.append(line[count:len(line)-1])
             tree.add_topic(stack)
     return tree
 
 # for topics page: group entries by topic
 def collect_topics(entries, metadata_dir):
     tree = read_topics(os.path.join(metadata_dir, "topics"))
     for entry, attributes in entries.items():
         for topic in attributes['topic']:
             tree.add_to_topic([s.strip() for s in topic.split('/')], entry)
     return tree
 
 
 class Builder():
     """Contains environment for building webpages from templates"""
 
     def __init__(self, options, entries, afp_entries):
         self.j2_env = Environment(loader=FileSystemLoader(options.templates_dir),
                                   trim_blocks=True)
         # pass functions to environment for use in templates
         self.prepare_env()
         self.options = options
         #TODO: use only afp_entries
         self.entries = entries
         self.afp_entries = afp_entries
 
     def prepare_env(self):
         def startswith(value, beginning):
             return value.startswith(beginning)
         def datetimeformat(value, format_str='%Y-%m-%d'):
             return value.strftime(format_str)
         def rfc822(value):
             # Locale could be something different than english, to prevent printing
             # non english months, we use this fix
             month = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split(" ")[value.month - 1]
             return value.strftime("%d " + month + " %Y %T %z")
         def split(value):
             return value.split()
         def short_month(value):
             return "jan feb mar apr may jun jul aug sep oct nov dec".split(" ")[value - 1]
         self.j2_env.filters['startswith'] = startswith
         self.j2_env.filters['datetimeformat'] = datetimeformat
         self.j2_env.filters['rfc822'] = rfc822
         self.j2_env.filters['split'] = split
         self.j2_env.filters['short_month'] = short_month
 
     def write_file(self, filename, template, values):
         # UTF-8 hack because of different string handling in python 2 vs 3
         with open(os.path.join(self.options.dest_dir, filename), 'wb') as f:
             f.write(template.render(values).encode('utf8'))
 
     def generate_standard(self, filename, template_name):
         template = self.j2_env.get_template(template_name)
         self.write_file(filename, template, {})
         terminal.success("Generated {}".format(filename))
 
     def generate_topics(self):
         tree = collect_topics(self.entries, self.options.metadata_dir)
         template = self.j2_env.get_template("topics.tpl")
         self.write_file("topics.html", template, {'tree': tree})
         terminal.success("Generated topics.html")
 
     def generate_index(self):
         data = {'is_devel': self.options.is_devel}
         by_year = groupby(sorted(self.afp_entries.values(),
                                  key=lambda e: (e.publish_date, e.name),
                                  reverse=True),
                           key=lambda e: e.publish_date.year)
         data['by_year'] = [(year, list(entries)) for year, entries in by_year]
         template = self.j2_env.get_template("index.tpl")
         self.write_file("index.html", template, data)
         terminal.success("Generated index.html")
 
     def generate_entries(self):
         counter = 0
         template = self.j2_env.get_template("entry.tpl")
         for name, entry in self.afp_entries.items():
             self.write_file(os.path.join("entries", name + ".html"), template,
                             {'entry': entry, 'is_devel': self.options.is_devel,
                              'ROOT_PATH': '../'})
             counter += 1
         for name, entry in self.afp_entries.no_index.items():
             self.write_file(os.path.join("entries", name + ".html"), template,
                             {'entry': entry, 'is_devel': self.options.is_devel,
                              'ROOT_PATH': '../'})
             counter += 1
         terminal.success("Generated html files for {:d} entries".format(counter))
 
     def generate_download(self):
         template = self.j2_env.get_template("download.tpl")
         self.write_file("download.html", template,
                         {'is_devel': self.options.is_devel})
         terminal.success("Generated download.html")
 
     def generate_statistics(self):
         #TODO: simplify with itertools
         # Count loc and articles per year
         articles_years = dict()
         loc_years = dict()
         for article in self.afp_entries.values():
             try:
                 articles_years[article.publish_date.year] += 1
                 loc_years[article.publish_date.year] += article.loc
             except KeyError:
                 articles_years[article.publish_date.year] = 1
                 loc_years[article.publish_date.year] = article.loc
         # Count new authors per year
         author_years = dict.fromkeys(articles_years.keys(), 0)
         for author in self.afp_entries.authors.values():
             first_year = min([e.publish_date.year for e in author.articles])
             try:
                 author_years[first_year] += 1
             except KeyError:
                 author_years[first_year] = 1
         # Build cumulative values
         author_years_cumulative = author_years.copy()
         for y in sorted(articles_years)[1:]:
             articles_years[y] += articles_years[y - 1]
             loc_years[y] += loc_years[y - 1]
             author_years_cumulative[y] += author_years_cumulative[y - 1]
         data = {'entries': self.afp_entries}
         data['num_lemmas'] = sum([a.lemmas for a in self.afp_entries.values()])
         data['num_loc'] = sum([a.loc for a in self.afp_entries.values()])
         data['years'] = sorted(articles_years)
         data['articles_year'] = [articles_years[y] for y
                                  in sorted(articles_years)]
         data['loc_years'] = [round(loc_years[y], -2) for y in sorted(loc_years)]
         data['author_years'] = [author_years[y] for y in sorted(author_years)]
         data['author_years_cumulative'] = [author_years_cumulative[y] for y in
                                            sorted(author_years_cumulative)]
         # Find 10 most imported entries, entries with the same number of
         # imports share one place.
         most_used = sorted([a for a in self.afp_entries.values()],
                            key=lambda x: (-len(x.used), x.name))
         # Show more than 10 articles but not more than necessary
         i = 0
         while (i < 10 or (i + 1 < len(most_used) and
           len(most_used[i].used) == len(most_used[i + 1].used))):
             i += 1
         # Groupby iterators trigger some obscure bug in jinja2
         # https://github.com/pallets/jinja/issues/555
         # So don't use groupby iterator directly and convert to list of lists
         data['most_used'] = [(len_used, list(articles)) for (len_used, articles)
                              in groupby(most_used[:i + 1],
                                         key=lambda x: len(x.used))]
         data['articles_by_time'] = sorted(self.afp_entries.values(),
                                           key=lambda x: x.publish_date)
         data['articles_per_year'] = [(year, list(articles)) for (year, articles)
                                      in groupby(data['articles_by_time'],
                                                 key=lambda x: x.publish_date.year)]
         template = self.j2_env.get_template("statistics.tpl")
         self.write_file("statistics.html", template, data)
         terminal.success("Generated statistics.html")
+        return data
 
     def generate_status(self, build_data):
         template = self.j2_env.get_template("status.tpl")
         self.write_file("status.html", template,
                         {'entries': [self.afp_entries[e] for e
                                      in sorted(self.afp_entries)],
                          'build_data': build_data})
         terminal.success("Generated status.html")
 
     def generate_rss(self, num_entries):
         entries = sorted(self.afp_entries.values(),
                          key=lambda e: (e.publish_date, e.name),
                          reverse=True)
         template = self.j2_env.get_template("rss.tpl")
         self.write_file("rss.xml", template, {'entries': entries[:num_entries]})
         terminal.success("Generated rss.xml")
diff --git a/admin/sitegen-lib/write_file.py b/admin/sitegen-lib/write_file.py
new file mode 100644
--- /dev/null
+++ b/admin/sitegen-lib/write_file.py
@@ -0,0 +1,18 @@
+import json
+import os
+
+
+def write_file(file, data, write=True, overwrite=False):
+    file_exists = os.path.isfile(file)
+
+    if file_exists and not overwrite:
+        with open(file) as r:
+            original_data = json.load(r)
+
+        data = {**original_data, **data}
+
+    # Only write file if write or if file doesn't exist
+    # Or, only don't write if write is false and file exists
+    if not file_exists or write:
+        with open(file, "w", encoding="utf-8") as w:
+            json.dump(data, w, ensure_ascii=False, indent=4)