diff --git a/anonymization/anonymization.py b/anonymization/anonymization.py
index a2e49f0..f4bfecb 100644
--- a/anonymization/anonymization.py
+++ b/anonymization/anonymization.py
@@ -14,7 +14,9 @@
 #
 # Copyright 2015-2017 by Claus Hunsen <hunsen@fim.uni-passau.de>
 # Copyright 2021 by Thomas Bock <bockthom@cs.uni-saarland.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
 # Copyright 2022 by Christian Hechtl <hechtl@cs.uni-saarland.de>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file is able to anonymize authors and issue titles after the extraction from the Codeface database was performed.
@@ -29,14 +31,14 @@
 import sys
 from os import path, walk, makedirs
 from os.path import abspath
-from shutil import copy
-
-from codeface.cli import log
-from codeface.configuration import Configuration
-from codeface.dbmanager import DBManager
+from logging import getLogger
 
+from codeface_utils.configuration import Configuration
 from csv_writer import csv_writer
 
+# create logger
+setup_logging()
+log = getLogger(__name__)
 
 ##
 # RUN POSTPROCESSING
@@ -104,13 +106,13 @@ def anonymize_authors(author_data, i, author_to_anonymized_author, name_only = F
 
             # Don't anonymize the deleted user as this one might be needed for filtering (but add it to the dictionary)
             if orig_author == "Deleted user" and orig_email == "ghost@github.com":
-                if not (orig_author, orig_email) in author_to_anonymized_author:
+                if (orig_author, orig_email) not in author_to_anonymized_author:
                     author_to_anonymized_author[(orig_author, orig_email)] = (orig_author, orig_email)
             else:
                 # check whether (name, e-mail) pair isn't already present in the dictionary
-                if not (orig_author, orig_email) in author_to_anonymized_author:
+                if (orig_author, orig_email) not in author_to_anonymized_author:
                         # check if just the name (without e-mail address) isn't already present in the dictionary
-                        if not orig_author in author_to_anonymized_author:
+                        if orig_author not in author_to_anonymized_author:
                             # if the author has an empty name, only anonymize their e-mail address
                             if not author[1] == "":
                                 author[1] = ("developer" + str(i))
@@ -141,7 +143,7 @@ def anonymize_authors(author_data, i, author_to_anonymized_author, name_only = F
 
 
     # Check for all files in the result directory of the project whether they need to be anonymized
-    for filepath, dirnames, filenames in walk(data_path):
+    for filepath, _, filenames in walk(data_path):
 
         # (1) Anonymize authors lists
         if authors_list in filenames:
@@ -170,7 +172,7 @@ def anonymize_authors(author_data, i, author_to_anonymized_author, name_only = F
             # anonymize authors
             author_data, i, author_to_anonymized_author = \
               anonymize_authors(author_data, i, author_to_anonymized_author)
-          
+
             author_data_gender, i_gender, author_to_anonymized_author_gender = \
               anonymize_authors(author_data_gender, i_gender, author_to_anonymized_author_gender, name_only = True)
 
@@ -334,7 +336,7 @@ def anonymize_authors(author_data, i, author_to_anonymized_author, name_only = F
                 makedirs(path.dirname(output_path))
             log.info("Write anonymized data to %s ...", output_path)
             csv_writer.write_to_csv(output_path, bot_data)
-        
+
         # (8) Anonymize gender list
         if gender_list in filenames:
             f = path.join(filepath, gender_list)
@@ -343,7 +345,7 @@ def anonymize_authors(author_data, i, author_to_anonymized_author, name_only = F
             gender_data_new = []
 
             for author in gender_data:
-                if author[0] in author_to_anonymized_author_gender.keys():
+                if author[0] in list(author_to_anonymized_author_gender.keys()):
                     new_person = author_to_anonymized_author_gender[author[0]]
                     author[0] = new_person[0]
                     gender_data_new.append(author)
@@ -395,7 +397,7 @@ def run():
     # process arguments
     # - First make all the args absolute
     __resdir = abspath(args.resdir)
-    __codeface_conf, __project_conf = map(abspath, (args.config, args.project))
+    __codeface_conf, __project_conf = list(map(abspath, (args.config, args.project)))
 
     # load configuration
     __conf = Configuration.load(__codeface_conf, __project_conf)
diff --git a/author_postprocessing/author_postprocessing.py b/author_postprocessing/author_postprocessing.py
index 13b1e38..2b54ef7 100644
--- a/author_postprocessing/author_postprocessing.py
+++ b/author_postprocessing/author_postprocessing.py
@@ -14,6 +14,8 @@
 #
 # Copyright 2015-2017 by Claus Hunsen <hunsen@fim.uni-passau.de>
 # Copyright 2020-2022 by Thomas Bock <bockthom@cs.uni-saarland.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file is able to disambiguate authors after the extraction from the Codeface database was performed. A manually
@@ -42,13 +44,14 @@
 from os import path, walk, makedirs
 from os.path import abspath
 from shutil import copy
+from logging import getLogger
 
-from codeface.cli import log
-from codeface.configuration import Configuration
-from codeface.dbmanager import DBManager
-
+from codeface_utils.configuration import Configuration
 from csv_writer import csv_writer
 
+# create logger
+setup_logging()
+log = getLogger(__name__)
 
 ##
 # RUN POSTPROCESSING
@@ -67,7 +70,7 @@ def perform_data_backup(results_path, results_path_backup):
         log.info("Backup folder already exists. No backup is to be performed.")
         return
 
-    for filepath, dirnames, filenames in walk(results_path):
+    for filepath, _, filenames in walk(results_path):
         for filename in filenames:
                 if filename.endswith(".list"):
                     current_file = path.join(filepath, filename)
@@ -119,7 +122,7 @@ def is_github_noreply_author(name, email):
 
 
     # Check for all files in the result directory of the project whether they need to be adjusted
-    for filepath, dirnames, filenames in walk(data_path):
+    for filepath, _, filenames in walk(data_path):
 
         # (1) Remove author 'GitHub <noreply@github.com>' from authors list
         if authors_list in filenames:
@@ -148,7 +151,7 @@ def is_github_noreply_author(name, email):
                 if not is_github_noreply_author(email[0], email[1]):
                     email_data_new.append(email)
                 else:
-                    log.warn("Remove email %s as it was sent by %s <%s>.", email[2], email[0], email[1])
+                    log.warning("Remove email %s as it was sent by %s <%s>.", email[2], email[0], email[1])
             csv_writer.write_to_csv(f, email_data_new)
 
 
@@ -198,19 +201,19 @@ def is_github_noreply_author(name, email):
                         # ignore merge commits in the commit data, we consistently ignore them also if they are added
                         # to a pull request. Hence, the corresponding "commit_added" event will be removed now (i.e.,
                         # not added to the new issue data any more).
-                        log.warn("Commit %s is added in the GitHub issue data, but not part of the commit data. " +
-                                 "Remove the corresponding 'commit_added' event from the issue data...", commit_hash)
+                        log.warning("Commit %s is added in the GitHub issue data, but not part of the commit data. " +
+                                    "Remove the corresponding 'commit_added' event from the issue data...", commit_hash)
                 elif is_github_noreply_author(event[9], event[10]):
                     # the event is authored by 'GitHub <noreply@github.com>', but is not a "commit_added" event, so we
                     # neglect this event and remove it now (i.e., not add it to the new issue data any more).
-                    log.warn("Event %s is authored by %s <%s>. Remove this event form the issue data...",
-                             event[8], event[9], event[10])
+                    log.warning("Event %s is authored by %s <%s>. Remove this event form the issue data...",
+                                event[8], event[9], event[10])
                 elif (is_github_noreply_author(event[12], event[13][1:-1])
                       and (event[8] == mentioned_event or event[8] == subscribed_event)):
                     # the event references 'GitHub <noreply@github.com>', so we neglect this event and remove it now
                     # (i.e., not add it to the new issue data any more).
-                    log.warn("Event %s by %s <%s> references %s <%s>. Remove this event from the issue data...",
-                             event[8], event[9], event[10], event[12], event[13])
+                    log.warning("Event %s by %s <%s> references %s <%s>. Remove this event from the issue data...",
+                                event[8], event[9], event[10], event[12], event[13])
                 else:
                     issue_data_new.append(event)
 
@@ -229,7 +232,7 @@ def is_github_noreply_author(name, email):
                 if not is_github_noreply_author(entry[0], entry[1]):
                     bot_data_new.append(entry)
                 else:
-                    log.warn("Remove entry %s <%s> from bots list.", entry[0], entry[1])
+                    log.warning("Remove entry %s <%s> from bots list.", entry[0], entry[1])
 
             csv_writer.write_to_csv(f, bot_data_new)
 
@@ -285,7 +288,7 @@ def run_postprocessing(conf, resdir, backup_data):
         return
 
     # Check for all files in the result directory of the project whether they need to be adjusted
-    for filepath, dirnames, filenames in walk(data_path):
+    for filepath, _, filenames in walk(data_path):
 
         # (1) Adjust authors lists
         if authors_list in filenames:
@@ -302,7 +305,7 @@ def run_postprocessing(conf, resdir, backup_data):
 
             for author in author_data:
                 # keep author entry only if it should not be removed
-                if not author in author_data_to_remove:
+                if author not in author_data_to_remove:
                     author_data_new.append(author)
             csv_writer.write_to_csv(f, author_data_new)
 
@@ -469,7 +472,7 @@ def run():
     # process arguments
     # - First make all the args absolute
     __resdir = abspath(args.resdir)
-    __codeface_conf, __project_conf = map(abspath, (args.config, args.project))
+    __codeface_conf, __project_conf = list(map(abspath, (args.config, args.project)))
     __backup_data = args.backup
 
     # load configuration
diff --git a/bot_processing/bot_processing.py b/bot_processing/bot_processing.py
index 53a397e..9b18dd4 100644
--- a/bot_processing/bot_processing.py
+++ b/bot_processing/bot_processing.py
@@ -13,23 +13,25 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 #
 # Copyright 2021-2022 by Thomas Bock <bockthom@cs.uni-saarland.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file is able to extract information on bot/human users from csv files.
 """
 
 import argparse
-import httplib
 import os
 import sys
-import urllib
-
-import operator
-from codeface.cli import log
-from codeface.configuration import Configuration
+from logging import getLogger
 
+from codeface_utils.configuration import Configuration
 from csv_writer import csv_writer
 
+# create logger
+setup_logging()
+log = getLogger(__name__)
+
 def run():
     # get all needed paths and arguments for the method call.
     parser = argparse.ArgumentParser(prog='codeface-extraction-bots-github', description='Codeface extraction')
@@ -39,7 +41,7 @@ def run():
 
     # parse arguments
     args = parser.parse_args(sys.argv[1:])
-    __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project))
+    __codeface_conf, __project_conf = list(map(os.path.abspath, (args.config, args.project)))
 
     # create configuration
     __conf = Configuration.load(__codeface_conf, __project_conf)
@@ -75,7 +77,7 @@ def load_bot_data(bot_file, header = True):
     :return: the read bot data
     """
 
-    log.devinfo("Read bot data from file '{}'...".format(bot_file))
+    log.info("Read bot data from file '{}'...".format(bot_file))
 
     # check if file exists and exit early if not
     if not os.path.exists(bot_file):
@@ -99,7 +101,7 @@ def load_user_data(user_data_file):
     :return: the read user data
     """
 
-    log.devinfo("Read user data from file '{}'...".format(user_data_file))
+    log.info("Read user data from file '{}'...".format(user_data_file))
 
     # check if file exists and exit early if not
     if not os.path.exists(user_data_file):
@@ -192,12 +194,12 @@ def add_user_data(bot_data, user_data, known_bots_file):
             continue
 
         # get user information if available
-        if user[0] in user_buffer.keys():
+        if user[0] in list(user_buffer.keys()):
             bot_reduced["user"] = user_buffer[user[0]]
             bot_reduced["prediction"] = user[-1]
             bot_data_reduced.append(bot_reduced)
         else:
-            log.warn("User '{}' in bot data does not occur in GitHub user data. Remove user...".format(user[0]))
+            log.warning("User '{}' in bot data does not occur in GitHub user data. Remove user...".format(user[0]))
 
     # check whether known GitHub bots occur in the GitHub issue data and, if so, update the bot data accordingly
     bot_data_reduced = check_with_known_bot_list(known_bots_file, bot_data, user_buffer, bot_data_reduced)
@@ -224,7 +226,7 @@ def print_to_disk(bot_data, results_folder):
                  user["user"]["email"],
                  user["prediction"]
                 )
-        if not entry in lines:
+        if entry not in lines:
             lines.append(entry)
 
     # write to output file
diff --git a/codeface_extraction/codeface_extraction.py b/codeface_extraction/codeface_extraction.py
index 7cf24ea..88d7069 100644
--- a/codeface_extraction/codeface_extraction.py
+++ b/codeface_extraction/codeface_extraction.py
@@ -14,7 +14,9 @@
 #
 # Copyright 2015-2017 by Claus Hunsen <hunsen@fim.uni-passau.de>
 # Copyright 2016, 2018-2019 by Thomas Bock <bockthom@fim.uni-passau.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
 # Copyright 2018 by Barbara Eckl <ecklbarb@fim.uni-passau.de>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file is able to extract developer--artifact relations from the Codeface database.
@@ -22,15 +24,18 @@
 
 import argparse
 import sys
+from logging import getLogger
 from os.path import abspath
 
-from codeface.cli import log
-from codeface.configuration import Configuration
-from codeface.dbmanager import DBManager
-
-import extractions
+from . import extractions
 from csv_writer import csv_writer
+from codeface_utils.dbmanager import DBManager
+from codeface_utils.configuration import Configuration
+from codeface_utils.util import setup_logging
 
+# create logger
+setup_logging()
+log = getLogger(__name__)
 
 ##
 # RUN FOR ALL PROJECTS
@@ -119,7 +124,7 @@ def run():
     # process arguments
     # - First make all the args absolute
     __resdir = abspath(args.resdir)
-    __codeface_conf, __project_conf = map(abspath, (args.config, args.project))
+    __codeface_conf, __project_conf = list(map(abspath, (args.config, args.project)))
     __extract_commit_messages = args.commit_messages
     __extract_impl = args.implementation
     __extract_on_range_level = args.range
diff --git a/codeface_extraction/extractions.py b/codeface_extraction/extractions.py
index 081a353..9c636dd 100644
--- a/codeface_extraction/extractions.py
+++ b/codeface_extraction/extractions.py
@@ -17,6 +17,7 @@
 # Copyright 2019, 2021 by Thomas Bock <bockthom@cs.uni-saarland.de>
 # Copyright 2018 by Barbara Eckl <ecklbarb@fim.uni-passau.de>
 # Copyright 2018 by Tina Schuh <schuht@fim.uni-passau.de>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file provides the class 'Extraction' and all of its subclasses.
@@ -26,18 +27,19 @@
 import os
 import unicodedata
 import re
+from logging import getLogger
 from ftfy import fix_encoding
 from email.header import decode_header, make_header
 
-from codeface.cli import log
-from codeface.util import gen_range_path
+from codeface_utils.util import gen_range_path
 
 
+log = getLogger(__name__)
+
 #
 # GET EXTRACTIONS
 #
 
-
 def get_extractions(dbm, conf, resdir, csv_writer, extract_commit_messages, extract_impl, extract_on_range_level):
     # all extractions are subclasses of Extraction:
     # instantiate them all!
@@ -117,7 +119,7 @@ def __init__(self, dbm, conf, res_dir, csv_writer):
     def is_project_level(self):
         """Check if this extraction is on project level (i.e., {revision} is not on the SQL statement)."""
 
-        return not ("{revision}" in self.sql)
+        return "{revision}" not in self.sql
 
     def is_generic_extraction(self):
         """Check if this extraction is generic (i.e., it can be used for several artifacts and, hence,
@@ -441,7 +443,7 @@ def __init__(self, dbm, conf, resdir, csv_writer):
     def get_list(self):
         result = self._run_sql(None, None)
         lines = self._reduce_result(result)
-        return [rev for (rev, date) in lines]
+        return [rev for (rev, _) in lines]
 
 
 #
@@ -723,7 +725,7 @@ def _reduce_result(self, result):
 
 def fix_characters_in_string(text):
     """
-    Removes control characters such as \r\n \x1b \ufffd from string impl and returns a unicode
+    Removes control characters such as \r\n \x1b \\ufffd from string impl and returns a unicode
     string where all control characters have been replaced by a space.
     :param text: expects a unicode string
     :return: unicode string
@@ -737,12 +739,12 @@ def fix_characters_in_string(text):
     new_text = fix_encoding(text)
 
     # remove unicode characters from "Specials" block
-     # see: https://www.compart.com/en/unicode/block/U+FFF0
-    new_text = re.sub(r"\\ufff.", " ", new_text.encode("unicode-escape"))
+    # see: https://www.compart.com/en/unicode/block/U+FFF0
+    new_text = re.sub(r"\\ufff.", " ", new_text).encode("unicode-escape")
 
     # remove all kinds of control characters and emojis
     # see: https://www.fileformat.info/info/unicode/category/index.htm
-    new_text = u"".join(ch if unicodedata.category(ch)[0] != "C" else " " for ch in new_text.decode("unicode-escape"))
+    new_text = "".join(ch if unicodedata.category(ch)[0] != "C" else " " for ch in new_text.decode("unicode-escape"))
 
     return new_text
 
@@ -765,12 +767,11 @@ def fix_name_encoding(name):
 
     try:
         # Apply correct encoding and return unicode string
-        return unicode(make_header(info))
+        return str(make_header(info))
     except UnicodeDecodeError:
         # Undo utf-8 encoding and return unicode string
-        return unicode(name.decode('utf-8'))
+        return str(name.decode('utf-8'))
     except LookupError:
         # Encoding not found, return string as is
         return name
-    return name
 
diff --git a/codeface_utils/__init__.py b/codeface_utils/__init__.py
new file mode 100644
index 0000000..9bad579
--- /dev/null
+++ b/codeface_utils/__init__.py
@@ -0,0 +1 @@
+# coding=utf-8
diff --git a/codeface_utils/cluster/idManager.py b/codeface_utils/cluster/idManager.py
new file mode 100644
index 0000000..43a4be5
--- /dev/null
+++ b/codeface_utils/cluster/idManager.py
@@ -0,0 +1,305 @@
+# This file is part of codeface-extraction, which is free software: you
+# can redistribute it and/or modify it under the terms of the GNU General
+# Public License as published by the Free Software Foundation, version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright 2010, 2011 by Wolfgang Mauerer <wm@linux-kernel.net>
+# Copyright 2012, 2013 by Siemens AG, Wolfgang Mauerer <wolfgang.mauerer@siemens.com>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
+# All Rights Reserved.
+#
+# The code in this file originates from:
+# https://github.com/siemens/codeface/blob/master/codeface/cluster/idManager.py
+# We inherit the 'idManager' and 'dbIdManager' classes from codeface.
+# The 'csvManager' class is original.
+
+from __future__ import absolute_import
+import re
+from email.utils import parseaddr
+from logging import getLogger
+import http.client as http_client
+import urllib.parse as urlparse
+import json
+import string
+import random
+import time
+from abc import ABC, abstractmethod
+import pandas
+
+from ..util import encode_as_utf8
+
+
+log = getLogger(__name__)
+
+class idManager(ABC):
+
+    def __init__(self):
+        # Cache identical requests to the server
+        self._cache = {}
+
+        self.fixup_emailPattern = re.compile(r'([^<]+)\s+<([^>]+)>')
+        self.commaNamePattern = re.compile(r'([^,\s]+),\s+(.+)')
+
+    @abstractmethod
+    def _query_user_id(self, name, email):
+        pass
+
+    @abstractmethod
+    def getPersonFromDB(self, person_id):
+        pass
+
+    def getPersonID(self, addr):
+        """Obtain a unique ID from contributor identity credentials."""
+
+        (name, email) = self._decompose_addr(addr)
+        if (name, email) not in self._cache:
+            self._cache[(name, email)] = self._query_user_id(name, email)
+        ID = self._cache[(name, email)]
+
+        return ID
+
+    def _cleanName(self, name):
+        # Remove or replace characters in names that are known
+        # to cause parsing problems in later stages
+        name = name.replace('\"', "")
+        name = name.replace("\'", "")
+        name = name.strip()
+
+        return name
+
+    def _decompose_addr(self, addr):
+        addr = addr.replace("[", "").replace("]", "")
+        (name, email) = parseaddr(addr)
+
+        # Handle cases where the name is unknown from commits that potentially
+        # predate the era of git, where only an e-mail address was given.
+        # In such a case, we set the name to the e-mail address. Otherwise,
+        # all authors with unknown name would be matched to one person.
+        if (name == "unknown" or name == "unknown (none)" or name == "none"):
+            name = email
+
+        # The eMail parser cannot handle Surname, Name <email@domain.tld> properly.
+        # Provide a fixup hack for this case
+        if (name == "" or email.count("@") == 0):
+            m = re.search(self.fixup_emailPattern, addr)
+            if m:
+                name = m.group(1)
+                email = m.group(2)
+                m2 = re.search(self.commaNamePattern, name)
+                if m2:
+                    # Replace "Surname, Name" by "Name Surname"
+                    name = "{0} {1}".format(m2.group(2), m2.group(1))
+
+                # print "Fixup for addr {0} required -> ({1}/{2})".format(addr, name, email)
+            else:
+                # check for the following special format: email@domain.tld <>
+                strangePattern = re.compile(r'(.*@.*)\s+(<>)')
+                m3 = re.search(strangePattern, addr)
+                if m3:
+                    # Replace addr by "email <email@domain.tld>"
+                    name = m3.group(1).split("@")[0] # get name before @ symbol
+                    email = m3.group(1)
+                    # print "Fixup for addr {0} required -> ({1}/{2})".format(addr, name, email)
+                else:
+                    # In this case, no eMail address was specified.
+                    # print("Fixup for email required, but FAILED for {0}".format(addr))
+                    name = addr
+                    rand_str = "".join(random.choice(string.ascii_lowercase + string.digits)
+                                       for _ in range(10))
+                    email = "could.not.resolve@" + rand_str
+
+        email = email.lower()
+
+        name = self._cleanName(name)
+        email = self._cleanName(email)
+
+        return (name, email)
+
+
+class dbIdManager(idManager):
+    """Provide unique IDs for developers.
+
+    This class provides an interface to the REST id server. Heuristics to
+    detect developers who operate under multiple identities are included
+    in the server."""
+
+    def __init__(self, dbm, conf):
+        super().__init__()
+
+        self._idMgrServer = conf["idServiceHostname"]
+        self._idMgrPort = conf["idServicePort"]
+        self._conn = http_client.HTTPConnection(self._idMgrServer, self._idMgrPort)
+
+        # Create a project ID
+        self._dbm = dbm
+        # TODO: Pass the analysis method to idManager via the configuration
+        # file. However, the method should not influence the id scheme so
+        # that the results are easily comparable.
+        self._projectID = self._dbm.getProjectID(conf["project"],
+                                                 conf["tagging"])
+
+        # Construct request headers
+        self.headers = {"Content-type":
+                            "application/x-www-form-urlencoded; charset=utf-8",
+                        "Accept": "text/plain"}
+
+    def _query_user_id(self, name, email):
+        """Query the ID database for a contributor ID"""
+
+        name = encode_as_utf8(name)
+        params = urlparse.urlencode({'projectID': self._projectID,
+                                     'name': name,
+                                     'email': email})
+
+        try:
+            self._conn.request("POST", "/post_user_id", params, self.headers)
+            res = self._conn.getresponse()
+        except:
+            retryCount = 0
+            successful = False
+            while (retryCount <= 10 and not successful):
+                log.warning("Could not reach ID service. Try to reconnect " \
+                            "(attempt {}).".format(retryCount))
+                self._conn.close()
+                self._conn = http_client.HTTPConnection(self._idMgrServer, self._idMgrPort)
+                time.sleep(60)
+                #self._conn.ping(True)
+                try:
+                    self._conn.request("POST", "/post_user_id", params, self.headers)
+                    res = self._conn.getresponse()
+                    successful = True
+                except:
+                    if retryCount < 10:
+                        retryCount += 1
+                    else:
+                        retryCount += 1
+                        log.exception("Could not reach ID service. Is the server running?\n")
+                        raise
+
+        # TODO: We should handle errors by throwing an exception instead
+        # of silently ignoring them
+        result = res.read()
+        jsond = json.loads(result)
+        try:
+            id = jsond["id"]
+        except KeyError:
+            raise Exception("Bad response from server: '{}'".format(jsond))
+
+        return (id)
+
+    def getPersonFromDB(self, person_id):
+        """Query the ID database for a contributor and all corresponding data"""
+
+        try:
+            self._conn.request("GET", "/getUser/{}".format(person_id), headers=self.headers)
+            res = self._conn.getresponse()
+        except:
+            self._conn.close()
+            self._conn = http_client.HTTPConnection(self._idMgrServer, self._idMgrPort)
+            retryCount = 0
+            successful = False
+            while (retryCount <= 10 and not successful):
+                log.warning("Could not reach ID service. Try to reconnect " \
+                            "(attempt {}).".format(retryCount))
+                self._conn.close()
+                self._conn = http_client.HTTPConnection(self._idMgrServer, self._idMgrPort)
+                time.sleep(60)
+                #self._conn.ping(True)
+                try:
+                    self._conn.request("GET", "/getUser/{}".format(person_id), headers=self.headers)
+                    res = self._conn.getresponse()
+                    successful = True
+                except:
+                    if retryCount < 10:
+                        retryCount += 1
+                    else:
+                        retryCount += 1
+                        log.exception("Could not reach ID service. Is the server running?\n")
+                        raise
+
+        result = res.read()
+        jsond = json.loads(result)[0]
+
+        return (jsond)
+
+
+class csvIdManager(idManager):
+    """Provide unique IDs for developers.
+
+    This class provides an interface to CSV id files.
+    """
+    def __init__(self, conf):
+        super().__init__()
+
+        # CSV file containing the IDs
+        self.csv_file = conf["csvFile"]
+        self.csv_sep  = conf["csvSeparator"]
+        self.df = self._verifyCsvFile()
+
+    def _verifyCsvFile(self):
+        with open(self.csv_file, "r") as file:
+            df = pandas.read_csv(file, sep=self.csv_sep, names=['ID', 'name', 'email'])
+            return df
+
+    def _addRow(self, name, email):
+
+        # determine next ID
+        max_id = self.df['ID'].max()
+        next_id = 0 if bool(pandas.isna(max_id)) else int(max_id) + 1
+
+        # append new row
+        self.df = self.df._append({
+            'ID': next_id,
+            'name': name,
+            'email': email
+        }, ignore_index=True)
+
+        # dump df to file
+        file = open(self.csv_file, "w")
+        self.df.to_csv(file, sep=self.csv_sep, index=False, header=False)
+
+        return next_id
+
+    def _query_user_id(self, name, email):
+        """Query the ID csv file for a contributor ID"""
+
+        # no name is okay, but no email is not
+        if not email:
+            return -1
+
+        # Match by name and email.
+        # Disregard random string after "could.not.resolve@" in email
+        # to avoid creating multiple entries for the same person.
+        if email.startswith("could.not.resolve@"):
+            rows = self.df[(self.df['name'] == name) &
+                           (self.df['email'].str.startswith("could.not.resolve@"))]
+        else:
+            rows = self.df[(self.df['name'] == name) &
+                           (self.df['email'] == email)]
+
+        if len(rows) == 0:
+            name = '' if not name else name
+            return self._addRow(name, email)
+
+        elif len(rows) == 1:
+            return int(rows['ID'].values[0])
+
+        else:
+            raise Exception("Constructed author list is in invalid format. Duplicate entries found")
+
+    def getPersonFromDB(self, person_id):
+        rows = self.df[self.df['ID'] == person_id]
+        if len(rows) == 1:
+            return {
+                'name': rows['name'].values[0],
+                'email1': rows['email'].values[0],
+                'id': person_id
+            }
diff --git a/codeface_utils/configuration.py b/codeface_utils/configuration.py
new file mode 100644
index 0000000..e4a654a
--- /dev/null
+++ b/codeface_utils/configuration.py
@@ -0,0 +1,217 @@
+# This file is part of codeface-extraction, which is free software: you
+# can redistribute it and/or modify it under the terms of the GNU General
+# Public License as published by the Free Software Foundation, version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright 2013 by Siemens AG, Johannes Ebke <johannes.ebke.ext@siemens.com>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
+# All Rights Reserved.
+#
+# The code in this file originates from:
+# https://github.com/siemens/codeface/blob/master/codeface/configuration.py
+
+'''
+Configuration module for codeface
+
+Encapsulates a configuration as an immutable dict
+'''
+
+from __future__ import absolute_import
+from tempfile import NamedTemporaryFile
+from collections.abc import Mapping
+from logging import getLogger
+import yaml
+
+from codeface_utils.linktype import LinkType
+
+
+log = getLogger(__name__)
+
+class ConfigurationError(Exception):
+    '''Raised if any part of the configuration is malformed'''
+    pass
+
+class Configuration(Mapping):
+    '''
+    Encapsulates the codeface configuration
+    '''
+
+    GLOBAL_KEYS = ('dbname', 'dbhost', 'dbuser', 'dbpwd',
+                   'idServiceHostname', 'idServicePort')
+    GLOBAL_OPTIONAL_KEYS = ('dbport', 'useCsv')
+    PROJECT_KEYS = ('project', 'repo', 'tagging', 'revisions', 'rcs')
+    OPTIONAL_KEYS = ('description', 'ml', 'mailinglists', 'sleepTime',
+                     'proxyHost', 'proxyPort', 'bugsProjectName',
+                     'productAsProject', 'issueTrackerType',
+                     'issueTrackerURL', 'issueTrackerProject',
+                     'issueTrackerUser', 'issueTrackerPassword',
+                     'understand', 'sloccount', 'windowSize', 'numWindows',
+                     'qualityType', 'communicationType', 'artifactType', 'dependencyType',
+                     'csvFile', 'csvSeparator')
+    ALL_KEYS = set(GLOBAL_KEYS + GLOBAL_OPTIONAL_KEYS + PROJECT_KEYS +
+                   OPTIONAL_KEYS)
+
+    def __init__(self):
+        '''
+        Initialize an empty configuration object with the default values
+        '''
+        self._conf = {
+                'idServiceHostname' : '127.0.0.1',
+                'idServicePort' : 8080
+                }
+
+        self._conf_file_loc = None
+
+    @classmethod
+    def load(cls, global_conffile, local_conffile=None):
+        '''
+        Load configuration from global/local files
+        '''
+        c = Configuration()
+        log.info("Loading global configuration file '{}'".
+                format(global_conffile))
+        cls._global_conf = c._load(global_conffile)
+        c._conf.update(c._global_conf)
+        if local_conffile:
+            log.info("Loading project configuration file '{}'".
+                    format(local_conffile))
+            cls._project_conf = c._load(local_conffile)
+            c._conf.update(c._project_conf)
+        else:
+            log.info("Not loading project configuration file!")
+        c._initialize()
+        c._check_sanity()
+        return c
+
+    def _load(self, filename):
+        '''Helper function that checks loading errors and logs them'''
+        try:
+            return yaml.load(open(filename, 'r'), Loader=yaml.SafeLoader)
+        except IOError:
+            log.exception("Could not open configuration file '{}'".
+                    format(filename))
+            raise
+        except yaml.YAMLError:
+            log.exception("Could not parse configuration file '{}'".
+                    format(filename))
+            raise
+
+    def _initialize(self):
+        '''Infer missing values in the configuration'''
+        if "rcs" not in self:
+            self._conf["rcs"] = [None for _ in range(len(self["revisions"]))]
+
+        if "mailinglists" not in self:
+            self._conf["mailinglists"] = []
+            if "ml" in self:
+                self._conf["mailinglists"].append({"name": self["ml"]})
+        for ml in self._conf["mailinglists"]:
+            ml.setdefault("type", "dev")
+            ml.setdefault("source", "gmane")
+
+        if "dbport" not in self:
+            self._conf["dbport"] = 3306
+        else:
+            self._conf["dbport"] = int(self._conf["dbport"])
+
+        if "useCsv" not in self:
+            self._conf["useCsv"] = False
+
+    def _check_sanity(self):
+        '''
+        Check that the configuration makes sense.
+        :raise ConfigurationError
+        '''
+
+        # Some elementary sanity checks
+        for key in self.GLOBAL_KEYS:
+            if self._project_conf and key in self._project_conf:
+                log.critical("The key '{}' may not be overridden in the "
+                        "project configuration file".format(key))
+                raise ConfigurationError('Invalid configuration key.')
+
+        for key in self.GLOBAL_KEYS + self.PROJECT_KEYS:
+            if key not in self:
+                log.critical("Required key '{}' missing in configuration!"
+                        ''.format(key))
+                raise ConfigurationError('Missing configuration key.')
+
+        if self['tagging'] not in LinkType.get_all_link_types():
+            log.critical('Unsupported tagging mechanism specified!')
+            raise ConfigurationError('Unsupported tagging mechanism.')
+
+        if len(self["revisions"]) < 2:
+            log.info("No revision range specified in configuration, using auto-generated windows")
+
+        if len(self["revisions"]) != len(self["rcs"]):
+            log.critical("Malformed configuration: revision and rcs list "
+                "lengths differ! Found {0} revisions and {1} release "
+                "candidates.".format(len(self["revisions"]), len(self["rcs"])))
+            raise ConfigurationError('Malformed configuration.')
+
+        if self["useCsv"]:
+            if "csvFile" not in self:
+                log.critical("Malformed configuration: useCsv is true, but "
+                    "csvFile is not specified.")
+                raise ConfigurationError('Malformed configuration.')
+            if "csvSeparator" not in self:
+                self["csvSeparator"] = ","
+
+        unknown_keys = [k for k in self if k not in self.ALL_KEYS]
+        for key in unknown_keys:
+            log.warning("Unknown key '{}' in configuration.".format(key))
+
+    def write(self):
+      conf_file = NamedTemporaryFile(mode='w', prefix=self._conf['project'],
+                                     delete=False)
+      yaml.dump(self._conf, conf_file)
+      self._conf_file_loc = conf_file.name
+      conf_file.close()
+
+    def get_conf_file_loc(self):
+      return self._conf_file_loc
+
+    # Function for the Configuration object to function as a dict
+    def __getitem__(self, key):
+        return self._conf[key]
+
+    def __setitem__(self, key, value):
+        self._conf[key] = value
+
+    def __len__(self):
+        return len(self._conf)
+
+    def __iter__(self):
+        return iter(self._conf)
+
+    def __keys__(self):
+        return list(self._conf.keys())
+
+    def __str__(self):
+        '''
+        Return a pretty string for display and logging
+        '''
+        r = []
+        r.append("--- # global codeface configuration")
+        for key in self.GLOBAL_KEYS:
+            if key in self:
+                r.append("{}: {}".format(key, repr(self[key])))
+        r.append("# codeface project configuration")
+        for key in self.PROJECT_KEYS + self.OPTIONAL_KEYS:
+            if key in self:
+                r.append("{}: {}".format(key, repr(self[key])))
+        unknown = [k for k in self if k not in self.ALL_KEYS]
+        if unknown:
+            r.append("# Unknown keys")
+            for key in unknown:
+                r.append("{}: {}".format(key, repr(self[key])))
+        return "\n".join(r)
diff --git a/codeface_utils/dbmanager.py b/codeface_utils/dbmanager.py
new file mode 100644
index 0000000..aecc172
--- /dev/null
+++ b/codeface_utils/dbmanager.py
@@ -0,0 +1,481 @@
+#! /usr/bin/env python
+# This file is part of Codeface. Codeface is free software: you can
+# redistribute it and/or modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation, version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright 2013 by Siemens AG, Wolfgang Mauerer <wolfgang.mauerer@siemens.com>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
+# All Rights Reserved.
+
+# Thin sql database wrapper
+
+from __future__ import absolute_import
+from __future__ import print_function
+import MySQLdb as mdb
+import time
+from datetime import datetime, timezone
+from logging import getLogger
+from contextlib import contextmanager
+
+
+# create logger
+log = getLogger(__name__)
+
+@contextmanager
+def _log_db_error(action, args=None):
+    try:
+        yield
+    except mdb.Error as e:
+        if args:
+            try:
+                action = action % args
+            except:
+                pass
+        log.critical('MySQL error {e[0]} during "{action}": {e[1]}'
+                     ''.format(e=e.args, action=action))
+        raise
+
+
+class DBManager:
+    """This class provides an interface to the codeface sql database."""
+
+    def __init__(self, conf):
+
+        self.conf = conf
+        self.__openConnection(conf)
+
+        # max_packet_size = 1024 * 1024 * 512
+        # self.doExec("SET GLOBAL max_allowed_packet=%s", (max_packet_size,))
+
+    def __del__(self):
+        if self.con is not None:
+            self.con.close()
+
+    def __openConnection(self, conf):
+        try:
+            self.con = None
+            self.con = mdb.Connection(host=conf["dbhost"],
+                                      port=conf["dbport"],
+                                      user=conf["dbuser"],
+                                      passwd=conf["dbpwd"],
+                                      db=conf["dbname"],
+                                      charset="utf8",
+                                      use_unicode=True)
+            self.cur = self.con.cursor()
+            log.debug(
+                "Establishing MySQL connection to "
+                "{c[dbuser]}@{c[dbhost]}:{c[dbport]}, DB '{c[dbname]}'"
+                    .format(c=conf))
+        except mdb.Error as e:
+            log.critical(
+                "Failed to establish MySQL connection to "
+                "{c[dbuser]}@{c[dbhost]}:{c[dbport]}, DB '{c[dbname]}'"
+                ": {e[1]} ({e[0]})"
+                "".format(c=conf, e=e.args))
+            raise
+
+
+    def doExec(self, stmt, args=None):
+        with _log_db_error(stmt, args):
+            retryCount = 0
+            while retryCount < 10:
+                try:
+                    if isinstance(args, list):
+                        res = self.cur.executemany(stmt, args)
+                    else:
+                        res = self.cur.execute(stmt, args)
+                    return res
+                except mdb.OperationalError as dbe:
+                    retryCount += 1
+                    log.info("DBE args: " + str(dbe.args))
+                    if dbe.args[0] == 1213:  # Deadlock! retry...
+                        log.warning("Recoverable deadlock in MySQL - retrying " \
+                                    "(attempt {}).".format(retryCount))
+                    elif dbe.args[0] == 2003:  # Can't connect to MySQL server
+                        log.warning("Can't connect to MySQL server - retrying " \
+                                    "(attempt {}).".format(retryCount))
+                        time.sleep(60)
+                        log.warning("Try opening new connection")
+                        self.con.close()
+                        log.warning("Connection successfully closed")
+                        self.__openConnection(self.conf)
+                        log.warning("Opening new connection successful")
+                    elif dbe.args[0] == 2006:  # Server gone away...
+                        log.warning("MySQL Server gone away, trying to reconnect " \
+                                    "(attempt {}).".format(retryCount))
+                        time.sleep(60)
+                        log.warning("Try opening new connection")
+                        self.con.close()
+                        log.warning("Connection successfully closed")
+                        self.__openConnection(self.conf)
+                        log.warning("Opening new connection successful")
+                    elif dbe.args[0] == 2013 or dbe.args[0] == 1053:  # Lost connection to MySQL server during query | Server shutdown in progress
+                        log.warning("Lost connection to MySQL server during query, " \
+                                    "trying to reconnect (attempt {}).".format(retryCount))
+                        time.sleep(60)
+                        log.warning("Try opening new connection")
+                        self.con.close()
+                        log.warning("Connection successfully closed")
+                        self.__openConnection(self.conf)
+                        log.warning("Opening new connection successful")
+                    elif dbe.args[0] == 1153:  # Got a packet bigger than 'max_allowed_packet' bytes
+                        log.warning("Sent a too big packet ({lnos} lines), retrying with smaller packets.".format(
+                            lnos=len(args)))
+                        ## split package into smaller packets of size 'chunk_size'
+                        chunk_size = 100
+                        args_list = [args[i:i + chunk_size] for i in range(0, len(args), chunk_size)]
+                        ## retrying
+                        time.sleep(60)
+                        self.con.ping(True)
+                        for chunk in args_list:
+                            self.doExec(stmt, chunk)
+                    else:
+                        self.con.close()
+                        raise
+
+            # Give up after too many retry attempts and propagate the
+            # problem to the caller. Either it's fixed with a different
+            # query, or the analysis fails
+            log.error("DB access failed after ten attempts, giving up")
+            self.con.close()
+            raise
+
+    def doFetchAll(self):
+        with _log_db_error("fetchall"):
+            return self.cur.fetchall()
+
+    def doCommit(self):
+        with _log_db_error("commit"):
+            return self.con.commit()
+
+    def doExecCommit(self, stmt, args=None):
+        self.doExec(stmt, args)
+        self.doCommit()
+
+    # NOTE: We don't provide any synchronisation since by assumption,
+    # a single project is never analysed from two threads.
+    def getProjectID(self, name, analysisMethod):
+        """
+        Return the project ID of the given name/analysisMethod combination.
+        If the project does not exist yet in the database, it is created.
+        """
+        self.doExec("SELECT id FROM project WHERE name=%s "
+                    "AND analysisMethod=%s", (name, analysisMethod))
+        if self.cur.rowcount == 0:
+            # Project is not contained in the database
+            log.info("Creating new project {}/{}".
+                        format(name, analysisMethod))
+            self.doExecCommit("INSERT INTO project (name, analysisMethod) " +
+                              "VALUES (%s, %s);", (name, analysisMethod))
+            self.doExec("SELECT id FROM project WHERE name=%s;", (name,))
+        elif self.cur.rowcount > 1:
+            raise Exception("Duplicate projects {}/{} in database!".
+                            format(name, analysisMethod))
+        pid = self.doFetchAll()[0][0]
+        log.info("Using project {}/{} with ID {}".
+                    format(name, analysisMethod, pid))
+        return pid
+
+    def get_project(self, pid):
+        self.doExec("SELECT name, analysisMethod FROM project"
+                    " WHERE id=%s", pid)
+        if self.cur.rowcount == 0:
+            raise Exception("Project id {} not found!".format(pid))
+        return self.doFetchAll()[0]
+
+    def get_edgelist(self, cid):
+        self.doExec("SELECT fromId, toId, weight FROM edgelist \
+                    WHERE clusterId={}".format(cid))
+        if self.cur.rowcount == 0:
+            raise Exception("Cluster id {} not found!".format(cid))
+        return self.doFetchAll()
+
+    def get_file_dev(self, project_id, range_id):
+        self.doExec("SELECT * FROM (SELECT id, commitHash, commitDate, author, description " \
+                    "FROM commit WHERE projectId={} AND releaseRangeId={}) AS Commits " \
+                    "INNER JOIN (SELECT file, commitId, SUM(size) AS fileSize " \
+                    "FROM commit_dependency GROUP BY commitId, file) AS commitFileLOC " \
+                    "ON Commits.id=commitFileLOC.commitId ORDER BY " \
+                    "commitFileLOC.file, commitFileLOC.commitId".format(project_id, range_id))
+
+        if self.cur.rowcount == 0:
+            raise Exception("Could not obtain file-dev information for project {} "\
+                            "(release range {}!".format(project_id, range_id))
+        return self.doFetchAll()
+
+    def get_release_ranges(self, project_id):
+        self.doExec("SELECT id FROM release_range \
+                    WHERE projectId={}".format(project_id))
+        if self.cur.rowcount == 0:
+            raise Exception("No release ranges found for project {}!"
+                            .format(project_id))
+        return [range_entry[0] for range_entry in self.doFetchAll()]
+
+    def get_cluster_id(self, pid, release_range_id=None):
+        if release_range_id:
+            self.doExec("SELECT id FROM cluster WHERE clusterNumber=-1 \
+                        AND projectId={} AND releaseRangeId={}"
+                        .format(pid, release_range_id))
+        else:
+            self.doExec("SELECT id FROM cluster WHERE clusterNumber=-1 \
+                        AND projectId={}".format(pid))
+        if self.cur.rowcount == 0:
+            raise Exception("Cluster from project {} not found!".format(pid))
+        return self.doFetchAll()[0][0]
+
+    def get_project_persons(self, pid):
+        self.doExec("SELECT id, name FROM person \
+                    WHERE projectId={}".format(pid))
+        if self.cur.rowcount == 0:
+            raise Exception("Persons from project {} not found!".format(pid))
+        return (self.doFetchAll())
+
+    def getTagID(self, projectID, tag, type):
+        """Determine the ID of a tag, given its textual form and the type"""
+        self.doExec("SELECT id FROM release_timeline WHERE projectId=%s " +
+                    "AND tag=%s AND type=%s", (projectID, tag, type))
+        if self.cur.rowcount != 1:
+            raise Exception("Tag '{}' of type {} is {} times in the DB!".
+                            format(tag, type, self.cur.rowcount))
+        return self.doFetchAll()[0][0]
+
+    def getCommitId(self, projectId, commitHash, releaseRangeID=None):
+        stmt = "SELECT id FROM commit WHERE commitHash=%s AND projectId=%s"
+        args = (commitHash, projectId)
+
+        if (releaseRangeID):
+            stmt += " AND releaseRangeId=%s"
+            args += (releaseRangeID, )
+
+        self.doExec(stmt, args)
+        if self.cur.rowcount == 0:
+            raise Exception("Commit {0} from project {1} not found!".
+                            format(commitHash, projectId))
+        return self.doFetchAll()[0][0]
+
+    def getRevisionID(self, projectID, tag):
+        return self.getTagID(projectID, tag, "release")
+
+    def getRCID(self, projectID, tag):
+        return self.getTagID(projectID, tag, "rc")
+
+    def getReleaseRangeID(self, projectID, revisionIDs):
+        """Given a pair of release IDs, determine the release range ID"""
+        self.doExec("SELECT id FROM release_range WHERE projectId=%s " +
+                    "AND releaseStartId=%s AND releaseEndId=%s",
+                    (projectID, revisionIDs[0], revisionIDs[1]))
+        if self.cur.rowcount != 1:
+            raise Exception("Release range from '{r[0]}' to '{r[1]}' is {c} "
+                            "times in the DB!".
+                            format(r=revisionIDs, c=self.cur.rowcount))
+        return self.doFetchAll()[0][0]
+
+    def getProjectTimeRange(self, pid):
+        """Given a project ID, determine the start and end date of available VCS data.
+           Returns a tuple with start end end date in the form YYYY-MM-DD"""
+        self.doExec("SELECT MIN(date_start) FROM revisions_view "
+                    "WHERE projectId={}".format(pid))
+        if self.cur.rowcount == 0:
+            raise Exception("No start date for pid {} found!".format(pid))
+        date_start = self.doFetchAll()[0][0].strftime("%Y-%m-%d")
+
+        self.doExec("SELECT MAX(date_end) FROM revisions_view "
+                    "WHERE projectId={}".format(pid))
+        if self.cur.rowcount == 0:
+            raise Exception("No end date for pid {} found!".format(pid))
+        date_end = self.doFetchAll()[0][0].strftime("%Y-%m-%d")
+
+        return (date_start, date_end)
+
+    def get_commit_cdate(self, pid, hash):
+        """Given a project ID and a commit hash, obtain the commit date
+           in format YYYY-MM-DD"""
+        self.doExec("SELECT commitDate FROM commit "
+                    "WHERE projectId={} and commitHash='{}'".format(pid, hash))
+        if self.cur.rowcount == 0:
+            raise Exception("No date found for commit {} (pid {}) found!".format(hash, pid))
+        date = self.doFetchAll()[0][0].strftime("%Y-%m-%d")
+
+        return (date)
+
+    def get_release_range(self, project_id, range_id):
+        self.doExec(
+            "SELECT st.tag, nd.tag, rc.tag FROM release_range "
+            "LEFT JOIN release_timeline AS st ON st.id=releaseStartId "
+            "LEFT JOIN release_timeline AS nd ON nd.id=releaseEndId "
+            "LEFT JOIN release_timeline AS rc ON rc.id=releaseRCStartId "
+            "WHERE release_range.projectId=%s AND release_range.id=%s",
+            (project_id, range_id))
+        ranges = self.doFetchAll()
+        if self.cur.rowcount == 0:
+            raise Exception("Range id {} not found!".format(project_id))
+        return ranges[0]
+
+    def get_num_commits_in_range(self, range_id):
+        self.doExec("SELECT COUNT(*) FROM commit WHERE releaseRangeId={}".format(range_id))
+        if self.cur.rowcount == 0:
+            raise Exception("Range id {} not found in get_num_commits_in_range!".format(range_id))
+        return self.doFetchAll()[0][0]
+
+    def update_release_timeline(self, project, tagging, revs, rcs,
+                                recreate_project=False):
+        '''
+        For a project, update the release timeline table with the given
+        revisions. If existing releases/rcs from the timeline are not in
+        order, the conservative approach is taken and the whole project is
+        recreated to avoid inconsistencies.
+
+        Returns true if the project had to be recreated.
+        '''
+        assert len(revs) >= 2
+        assert len(revs) == len(rcs)
+        rcs = [rc if rc else rev for rc, rev in zip(rcs, revs)]
+        pid = self.getProjectID(project, tagging)
+
+        if not recreate_project:
+            # First check if the release timeline is sane and in order
+            self.doExec("SELECT tag FROM release_timeline WHERE projectId=%s "
+                        "AND type='release' ORDER BY id", (pid,))
+            tags = [tag for (tag,) in self.doFetchAll()]
+            if len(set(tags)) != len(tags):
+                log.error("Database corrupted: Duplicate release entries in "
+                          "release_timeline! Recreating project.")
+                recreate_project = True
+            if len(tags) == 0:
+                recreate_project = True
+
+        # Check that the tags are in the same order
+        if not recreate_project:
+            for i, tag in enumerate(tags):
+                if i >= len(revs):
+                    log.warning("List of revisions to analyse was shortened.")
+                    break
+                if revs[i] != tag:
+                    log.error("Release number {} changed tag from {} to "
+                              "{}. Recreating project.".
+                              format(i, tag, revs[i]))
+                    recreate_project = True
+                    break
+
+        # Check that the RC tags are in order
+        if not recreate_project:
+            self.doExec("SELECT tag FROM release_timeline WHERE "
+                        "projectId=%s AND type='rc' ORDER BY id", (pid,))
+            rctags = [tag for (tag,) in self.doFetchAll()]
+            if len(set(rctags)) != len(rctags):
+                log.error("Database corrupted: Duplicate RC entries in "
+                          "release_timeline! Recreating project.")
+                recreate_project = True
+
+        # Check for changes in release candidates
+        # Note that the first RC is unused, since it refers to the end
+        # of a previous period
+        if not recreate_project:
+            for i, tag in enumerate(rctags):
+                if i + 1 >= len(rcs):
+                    log.warning("List of release candidates to analyse "
+                                "was shortened.")
+                    break
+                if rcs[i + 1] != tag:
+                    log.error("Release candidate number {} changed tag "
+                              "from {} to {}. Recreating project.".
+                              format(i, tag, rcs[i + 1]))
+                    recreate_project = True
+                    break
+
+        # Go through the release ranges and check if they have changed
+        if not recreate_project:
+            self.doExec(
+                "SELECT st.tag, nd.tag, rc.tag FROM release_range "
+                "LEFT JOIN release_timeline AS st ON st.id=releaseStartId "
+                "LEFT JOIN release_timeline AS nd ON nd.id=releaseEndId "
+                "LEFT JOIN release_timeline AS rc ON rc.id=releaseRCStartId "
+                "WHERE release_range.projectId=%s ORDER BY release_range.id",
+                (pid,))
+            ranges = self.doFetchAll()
+            if len(set(ranges)) != len(tags) - 1:
+                log.error("Database corrupted: Number of release ranges"
+                          " does not match number of release tags!")
+                recreate_project = True
+
+            for i, (start, end, rc) in enumerate(self.doFetchAll()):
+                if i + 1 >= len(revs) or recreate_project:
+                    # List of revisions to analyse was shortened
+                    break
+                if (start, end) != (revs[i], revs[i + 1]):
+                    log.error("Release range {} changed from {} to {}."
+                              " Recreating project.".
+                              format(i, (start, end), (revs[i], revs[i + 1])))
+                    recreate_project = True
+                    break
+                if rc != rcs[i + 1]:
+                    log.error("Release candidate {} changed from {} to {}."
+                              " Recreating project.".
+                              format(i, rc, rcs[i + 1]))
+                    recreate_project = True
+                    break
+
+        # Recreate project if necessary
+        if recreate_project:
+            # This should ripple through the database and delete
+            # all referencing entries for project
+            log.warning("Deleting and re-creating project {}/{}.".
+                        format(project, tagging))
+            self.doExecCommit("DELETE FROM `project` WHERE id=%s", (pid,))
+            pid = self.getProjectID(project, tagging)
+            tags = []
+            rctags = []
+
+        # at this point we have verified that the first len(tags)
+        # entries are identical
+        new_ranges_to_process = []
+        if len(revs) > len(tags):
+            n_new = len(revs) - len(tags)
+            log.info("Adding {} new releases...".format(n_new))
+            previous_rev = None
+            if len(tags) > 0:
+                previous_rev = tags[-1]
+            for rev, rc in zip(revs, rcs)[len(tags):]:
+                self.doExecCommit("INSERT INTO release_timeline "
+                                  "(type, tag, projectId) "
+                                  "VALUES (%s, %s, %s)",
+                                  ("release", rev, pid))
+
+                if previous_rev is not None and rc:
+                    self.doExecCommit("INSERT INTO release_timeline "
+                                      "(type, tag, projectId) "
+                                      "VALUES (%s, %s, %s)",
+                                      ("rc", rc, pid))
+
+                if previous_rev is not None:
+                    startID = self.getRevisionID(pid, previous_rev)
+                    endID = self.getRevisionID(pid, rev)
+                    if rc:
+                        rcID = self.getRCID(pid, rc)
+                    else:
+                        rcID = "NULL"
+                    self.doExecCommit("INSERT INTO release_range "
+                                      "(releaseStartId, releaseEndId, "
+                                      "projectId, releaseRCStartId) "
+                                      "VALUES (%s, %s, %s, %s)",
+                                      (startID, endID, pid, rcID))
+                    new_ranges_to_process.append(self.getReleaseRangeID(pid,
+                                                                        (startID, endID)))
+                previous_rev = rev
+        # now we are in a well-defined state.
+        # Return the ids of the release ranges we have to process
+        return new_ranges_to_process
+
+
+def tstamp_to_sql(tstamp):
+    """Convert a Unix timestamp into an SQL compatible DateTime string"""
+    return datetime.fromtimestamp(tstamp, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
diff --git a/codeface_utils/linktype.py b/codeface_utils/linktype.py
new file mode 100644
index 0000000..617d11f
--- /dev/null
+++ b/codeface_utils/linktype.py
@@ -0,0 +1,42 @@
+# This file is part of codeface-extraction, which is free software: you
+# can redistribute it and/or modify it under the terms of the GNU General
+# Public License as published by the Free Software Foundation, version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright 2013 by Siemens AG, Wolfgang Mauerer <wolfgang.mauerer@siemens.com>
+# Copyright 2014 by Matthias Dittrich <matthi.d@gmail.com>
+# All Rights Reserved.
+#
+# The code in this file originates from:
+# https://github.com/siemens/codeface/blob/master/codeface/linktype.py
+
+
+#enum-like class to distinguish between the various
+#methods used to link individuals
+class LinkType:
+    tag = "tag"
+    proximity = "proximity"
+    committer2author = "committer2author"
+    file = "file"
+    feature = "feature"
+    feature_file = "feature_file"
+
+    _all_link_types = \
+        (tag, proximity, committer2author, file, feature, feature_file)
+
+    @staticmethod
+    def get_all_link_types():
+        return LinkType._all_link_types
+
+    @staticmethod
+    def get_tag_types():
+        return ["Signed-off-by", "Acked-by", "CC", "Reviewed-by",
+                "Reported-by", "Tested-by", "Patch"]
diff --git a/codeface_utils/util.py b/codeface_utils/util.py
new file mode 100644
index 0000000..59402d8
--- /dev/null
+++ b/codeface_utils/util.py
@@ -0,0 +1,111 @@
+# This file is part of codeface-extraction, which is free software: you
+# can redistribute it and/or modify it under the terms of the GNU General
+# Public License as published by the Free Software Foundation, version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright 2013 by Siemens AG, Wolfgang Mauerer <wolfgang.mauerer@siemens.com>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
+# All Rights Reserved.
+#
+# The code in this file originates from:
+# https://github.com/siemens/codeface/blob/master/codeface/util.py
+
+from __future__ import absolute_import
+import logging
+import os
+import os.path
+import re
+import sys
+import traceback
+import unicodedata
+from threading import enumerate as threading_enumerate
+from ftfy import fix_encoding
+
+def setup_logging(level=logging.INFO):
+    logging.basicConfig(
+        level=level,
+        format='%(asctime)s [%(name)s] %(levelname)s: %(message)s'
+    )
+
+log = logging.getLogger(__name__)
+
+# Function to dump the stacks of all threads
+def get_stack_dump():
+    id2name = dict([(th.ident, th.name) for th in threading_enumerate()])
+    code = ["Stack dump:"]
+    for threadId, stack in sys._current_frames().items():
+        code.append("")
+        code.append("# Thread: %s(%d)" % (id2name.get(threadId,""), threadId))
+        for filename, lineno, name, line in traceback.extract_stack(stack):
+            code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
+            if line:
+                code.append("  %s" % (line.strip()))
+    return code
+
+def gen_range_path(base_path, i, start_rev, end_rev):
+    if (len(start_rev) == 40):
+        # Same logic as above, but construct a file system path
+        start_rev = start_rev[0:6]
+        end_rev = end_rev[0:6]
+    return(os.path.join(base_path, "{0}--{1}-{2}".
+                        format(str(i).zfill(3), start_rev, end_rev)))
+
+def encode_as_utf8(string):
+    """
+    Encode the given string properly in UTF-8,
+    independent from its internal representation (str or unicode).
+
+    This function removes any control characters and four-byte-encoded unicode characters and replaces them
+    with " ". (Four-byte-encoded unicode characters do not work with 'utf8' encoding of MySQL.)
+
+    :param string: any string
+    :return: the UTF-8 encoded string of type str
+    """
+
+    # Normalize to str first
+    if isinstance(string, bytes):
+        try:
+            text = string.decode("utf-8")
+        except UnicodeDecodeError:
+            text = string.decode("utf-8", errors="replace")
+    elif isinstance(string, str):
+        text = string
+    else:
+        # not string-like, return as-is
+        return string
+
+    # convert to real unicode-utf8 encoded string, fix_text ensures proper encoding
+    new_string = fix_encoding(text)
+
+    # remove unicode characters from "Specials" block
+    # see: https://www.compart.com/en/unicode/block/U+FFF0
+    new_string = re.sub(r"[\ufff0-\uffff]", " ", new_string)
+
+    # remove all kinds of control characters and emojis
+    # see: https://www.fileformat.info/info/unicode/category/index.htm
+    new_string = u"".join(ch if unicodedata.category(ch)[0] != "C" else " " for ch in new_string)
+
+    new_string = new_string.encode("utf-8")
+
+    # replace any 4-byte characters with a single space (previously: four_byte_replacement)
+    try:
+        # UCS-4 build
+        four_byte_regex = re.compile(u"[\U00010000-\U0010ffff]")
+    except re.error:
+        # UCS-2 build
+        four_byte_regex = re.compile(u"[\uD800-\uDBFF][\uDC00-\uDFFF]")
+
+    four_byte_replacement = r" "  # r":4bytereplacement:"
+    new_string = four_byte_regex.sub(four_byte_replacement, new_string.decode("utf-8")).encode("utf-8")
+
+    return new_string.decode("utf-8")
+
diff --git a/csv_writer/csv_writer.py b/csv_writer/csv_writer.py
index 2804081..ca453be 100644
--- a/csv_writer/csv_writer.py
+++ b/csv_writer/csv_writer.py
@@ -15,6 +15,7 @@
 # Copyright 2017 by Claus Hunsen <hunsen@fim.uni-passau.de>
 # Copyright 2018 by Anselm Fehnker <fehnker@fim.uni-passau.de>
 # Copyright 2020-2021 by Thomas Bock <bockthom@cs.uni-saarland.de>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file provides the needed functions for standardized CSV writing
@@ -23,19 +24,6 @@
 import csv
 
 
-def __encode(line):
-    """Encode the given line (a tuple of columns) properly in UTF-8."""
-
-    lineres = ()  # re-encode column if it is unicode
-    for column in line:
-        if type(column) is unicode:
-            lineres += (column.encode("utf-8"),)
-        else:
-            lineres += (column,)
-
-    return lineres
-
-
 def write_to_csv(file_path, lines, append=False):
     """
     Write the given lines to the file with the given file path.
@@ -45,14 +33,13 @@ def write_to_csv(file_path, lines, append=False):
     :param append: Flag if lines shall be appended to file or overwrite file
     """
 
-    open_mode = "a+b" if append else "wb"
+    open_mode = "a" if append else "w"
 
-    with open(file_path, open_mode) as csv_file:
+    with open(file_path, mode=open_mode, encoding="utf-8") as csv_file:
         wr = csv.writer(csv_file, delimiter=';', lineterminator='\n', quoting=csv.QUOTE_NONNUMERIC)
         # encode in proper UTF-8 before writing to file
         for line in lines:
-            line_encoded = __encode(line)
-            wr.writerow(line_encoded)
+            wr.writerow(line)
 
 def read_from_csv(file_path, delimiter=";"):
     """
diff --git a/issue_processing/issue_processing.py b/issue_processing/issue_processing.py
index a901e19..3db14d5 100644
--- a/issue_processing/issue_processing.py
+++ b/issue_processing/issue_processing.py
@@ -18,28 +18,31 @@
 # Copyright 2018-2019 by Anselm Fehnker <fehnker@fim.uni-passau.de>
 # Copyright 2019 by Thomas Bock <bockthom@fim.uni-passau.de>
 # Copyright 2020-2021 by Thomas Bock <bockthom@cs.uni-saarland.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file is able to extract Github issue data from json files.
 """
 
 import argparse
-import httplib
 import json
 import os
 import sys
-import urllib
 from datetime import datetime, timedelta
+from logging import getLogger
 
-import operator
-from codeface.cli import log
-from codeface.cluster.idManager import idManager
-from codeface.configuration import Configuration
-from codeface.dbmanager import DBManager
+from codeface_utils.cluster.idManager import dbIdManager, csvIdManager
+from codeface_utils.configuration import Configuration
+from codeface_utils.dbmanager import DBManager
 from dateutil import parser as dateparser
 
 from csv_writer import csv_writer
 
+# create logger
+setup_logging()
+log = getLogger(__name__)
+
 # known types from JIRA and GitHub default labels
 known_types = {"bug", "improvement", "enhancement", "new feature", "task", "test", "wish"}
 
@@ -61,7 +64,7 @@ def run():
 
     # parse arguments
     args = parser.parse_args(sys.argv[1:])
-    __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project))
+    __codeface_conf, __project_conf = list(map(os.path.abspath, (args.config, args.project)))
 
     # create configuration
     __conf = Configuration.load(__codeface_conf, __project_conf)
@@ -95,7 +98,7 @@ def load(source_folder):
     """
 
     srcfile = os.path.join(source_folder, "issues.json")
-    log.devinfo("Loading Github issues from file '{}'...".format(srcfile))
+    log.info("Loading Github issues from file '{}'...".format(srcfile))
 
     # check if file exists and exit early if not
     if not os.path.exists(srcfile):
@@ -191,7 +194,7 @@ def lookup_user(user_dict, user):
         user["email"] is None or user["email"] == ""):
 
         # lookup user only if username is not None and not empty
-        if not user["username"] is None and not user["username"] == "":
+        if user["username"] is not None and not user["username"] == "":
             user = user_dict[user["username"]]
 
     return user
@@ -210,8 +213,8 @@ def update_user_dict(user_dict, user):
     if user is None:
         user = create_deleted_user()
 
-    if not user["username"] in user_dict.keys():
-        if not user["username"] is None and not user["username"] == "":
+    if user["username"] not in list(user_dict.keys()):
+        if user["username"] is not None and not user["username"] == "":
             user_dict[user["username"]] = user
     else:
         user_in_dict = user_dict[user["username"]]
@@ -232,7 +235,7 @@ def reformat_issues(issue_data):
     :return: the re-arranged issue data
     """
 
-    log.devinfo("Re-arranging Github issues...")
+    log.info("Re-arranging Github issues...")
 
     # re-process all issues
     for issue in issue_data:
@@ -340,7 +343,7 @@ def merge_issue_events(issue_data):
 
             # as we cannot update the referenced issue during iterating over all issues, we need to save the
             # referenced_by event for the referenced issue temporarily
-            if rel_issue["number"] in issue_data_to_update.keys():
+            if rel_issue["number"] in list(issue_data_to_update.keys()):
                 issue_data_to_update[rel_issue["number"]]["eventsList"].append(referenced_issue_event)
             else:
                 ref = dict()
@@ -422,7 +425,7 @@ def merge_issue_events(issue_data):
         # add dismissal comments to the list of comments
         for event in issue["eventsList"]:
 
-            if (event["event"] == "review_dismissed" and not event["dismissalMessage"] is None
+            if (event["event"] == "review_dismissed" and event["dismissalMessage"] is not None
                and not event["dismissalMessage"] == ""):
                 dismissalComment = dict()
                 dismissalComment["event"] = "commented"
@@ -500,7 +503,7 @@ def merge_issue_events(issue_data):
         issue["eventsList"] = sorted(issue["eventsList"], key=lambda k: k["created_at"])
 
     # updates all the issues by the temporarily stored referenced_by events
-    for key, value in issue_data_to_update.iteritems():
+    for _, value in issue_data_to_update.items():
         for issue in issue_data:
             if issue["number"] == value["number"]:
                 issue["eventsList"] = issue["eventsList"] + value["eventsList"]
@@ -535,7 +538,7 @@ def reformat_events(issue_data):
             users = update_user_dict(users, event["user"])
 
             # 3) add or update users which are ref_target of the current event
-            if not event["ref_target"] is None and not event["ref_target"] == "":
+            if event["ref_target"] is not None and not event["ref_target"] == "":
                 users = update_user_dict(users, event["ref_target"])
 
     # as the user dictionary is created, start re-formating the event information of all issues
@@ -636,7 +639,7 @@ def reformat_events(issue_data):
                 event["event_info_1"] = issue["state_new"]
                 event["event_info_2"] = issue["resolution"]
 
-            elif event["event"] == "referenced" and not event["commit"] is None:
+            elif event["event"] == "referenced" and event["commit"] is not None:
                 # remove "referenced" events originating from commits
                 # as they are handled as referenced commit
                 events_to_remove.append(event)
@@ -670,10 +673,13 @@ def insert_user_data(issues, conf, resdir):
     user_id_buffer = dict()
     # create buffer for usernames (key: username)
     username_id_buffer = dict()
-    # open database connection
-    dbm = DBManager(conf)
-    # open ID-service connection
-    idservice = idManager(dbm, conf)
+
+    # connect to ID service
+    if conf["useCsv"]:
+        idservice = csvIdManager(conf)
+    else:
+        dbm = DBManager(conf)
+        idservice = dbIdManager(dbm, conf)
 
     def get_user_string(name, email):
         if not email or email is None:
@@ -683,26 +689,24 @@ def get_user_string(name, email):
             return "{name} <{email}>".format(name=name, email=email)
 
     def get_id_and_update_user(user, buffer_db_ids=user_id_buffer, buffer_usernames=username_id_buffer):
-        username = unicode(user["username"]).encode("utf-8")
 
-        # fix encoding for name and e-mail address
-        if user["name"] is not None:
-            name = unicode(user["name"]).encode("utf-8")
-        else:
-            name = username
-        mail = unicode(user["email"]).encode("utf-8")
+        # ensure string representation for name and e-mail address
+        username = str(user["username"])
+        name = str(user["name"]) if "name" in user else username
+        mail = str(user["email"])
+
         # construct string for ID service and send query
         user_string = get_user_string(name, mail)
 
         # check buffer to reduce amount of DB queries
         if user_string in buffer_db_ids:
-            log.devinfo("Returning person id for user '{}' from buffer.".format(user_string))
+            log.info("Returning person id for user '{}' from buffer.".format(user_string))
             if username is not None:
                 buffer_usernames[username] = buffer_db_ids[user_string]
             return buffer_db_ids[user_string]
 
         # get person information from ID service
-        log.devinfo("Passing user '{}' to ID service.".format(user_string))
+        log.info("Passing user '{}' to ID service.".format(user_string))
         idx = idservice.getPersonID(user_string)
 
         # add user information to buffer
@@ -719,16 +723,17 @@ def get_user_from_id(idx, buffer_db=user_buffer):
 
         # check whether user information is in buffer to reduce amount of DB queries
         if idx in buffer_db:
-            log.devinfo("Returning user '{}' from buffer.".format(idx))
+            log.info("Returning user '{}' from buffer.".format(idx))
             return buffer_db[idx]
 
         # get person information from ID service
-        log.devinfo("Passing user id '{}' to ID service.".format(idx))
+        log.info("Passing user id '{}' to ID service.".format(idx))
         person = idservice.getPersonFromDB(idx)
-        user = dict()
-        user["email"] = person["email1"]  # column "email1"
-        user["name"] = person["name"]  # column "name"
-        user["id"] = person["id"]  # column "id"
+        user = {
+            "name": person["name"],
+            "email": person["email1"],
+            "id": person["id"]
+        }
 
         # add user information to buffer
         buffer_db[idx] = user
diff --git a/issue_processing/jira_issue_processing.py b/issue_processing/jira_issue_processing.py
index d9748ae..4220b96 100644
--- a/issue_processing/jira_issue_processing.py
+++ b/issue_processing/jira_issue_processing.py
@@ -17,7 +17,8 @@
 # Copyright 2018 by Barbara Eckl <ecklbarb@fim.uni-passau.de>
 # Copyright 2018-2019 by Anselm Fehnker <fehnker@fim.uni-passau.de>
 # Copyright 2020-2021 by Thomas Bock <bockthom@cs.uni-saarland.de>
-# Copyright 2023 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
+# Copyright 2023, 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file is able to extract Jira issue data from xml files.
@@ -26,27 +27,29 @@
 import argparse
 import os
 import sys
-import time
 import csv
 import json
+from logging import getLogger
 
 from xml.dom.minidom import parse
-from datetime import datetime
 from dateutil import parser as dateparser
 
-from codeface.cli import log
-from codeface.cluster.idManager import idManager
-from codeface.configuration import Configuration
-from codeface.dbmanager import DBManager
+from codeface_utils.cluster.idManager import dbIdManager, csvIdManager
+from codeface_utils.configuration import Configuration
+from codeface_utils.dbmanager import DBManager
 
 from csv_writer import csv_writer
 
 from jira import JIRA
 from jira.exceptions import JIRAError
 from time import sleep
+import importlib
 
-reload(sys)
-sys.setdefaultencoding("utf-8")
+importlib.reload(sys)
+
+# create logger
+setup_logging()
+log = getLogger(__name__)
 
 # global counter for JIRA requests to make sure to not exceed the request limit
 jira_request_counter = 0
@@ -65,7 +68,7 @@ def run():
 
     # parse arguments
     args = parser.parse_args(sys.argv[1:])
-    __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project))
+    __codeface_conf, __project_conf = list(map(os.path.abspath, (args.config, args.project)))
 
     # create configuration
     __conf = Configuration.load(__codeface_conf, __project_conf)
@@ -114,9 +117,9 @@ def run():
         processed_issues.extend(issues)
 
     # 4) insert referenced_by events into issue histories
-    for issue_id in referenced_bys.keys():
+    for issue_id in list(referenced_bys.keys()):
         # obtain list of issues which have the current issue id
-        referenced_issue = list(filter(lambda issue: issue["externalId"] == issue_id, processed_issues))
+        referenced_issue = list([issue for issue in processed_issues if issue["externalId"] == issue_id])
         if len(referenced_issue) > 0:
             if len(referenced_issue) > 1:
                 log.warning("Ambiguous issue id " + issue_id + " found in the issue list.")
@@ -172,7 +175,7 @@ def load_xml(source_folder, xml_file):
     """
 
     srcfile = os.path.join(source_folder, xml_file)
-    log.devinfo("Loading issues from file '{}'...".format(srcfile))
+    log.info("Loading issues from file '{}'...".format(srcfile))
 
     try:
         # parse the xml-file
@@ -235,21 +238,21 @@ def merge_user_with_user_from_csv(user, persons):
     """
 
     new_user = dict()
-    name_utf8 = unicode(user["name"]).encode("utf-8")
-    username_utf8 = unicode(user["username"].lower()).encode("utf-8")
+    name_utf8 = str(user["name"]).encode("utf-8")
+    username_utf8 = str(user["username"].lower()).encode("utf-8")
 
-    if username_utf8 in persons["by_username"].keys():
+    if username_utf8 in list(persons["by_username"].keys()):
         new_user["username"] = username_utf8
-        new_user["name"] = unicode(persons["by_username"].get(username_utf8)[0]).encode("utf-8")
-        new_user["email"] = unicode(persons["by_username"].get(username_utf8)[1]).encode("utf-8")
-    elif name_utf8 in persons["by_name"].keys():
+        new_user["name"] = str(persons["by_username"].get(username_utf8)[0]).encode("utf-8")
+        new_user["email"] = str(persons["by_username"].get(username_utf8)[1]).encode("utf-8")
+    elif name_utf8 in list(persons["by_name"].keys()):
         new_user["username"] = username_utf8
-        new_user["name"] = unicode(persons["by_name"].get(name_utf8)[0]).encode("utf-8")
-        new_user["email"] = unicode(persons["by_name"].get(name_utf8)[1]).encode("utf-8")
+        new_user["name"] = str(persons["by_name"].get(name_utf8)[0]).encode("utf-8")
+        new_user["email"] = str(persons["by_name"].get(name_utf8)[1]).encode("utf-8")
     else:
         new_user["username"] = username_utf8
         new_user["name"] = name_utf8
-        new_user["email"] = unicode(user["email"]).encode("utf-8")
+        new_user["email"] = str(user["email"]).encode("utf-8")
         log.warning("User not in csv-file: " + str(user))
 
     log.info("current User: " + str(user) + ",    new user: " + str(new_user))
@@ -290,7 +293,7 @@ def parse_xml(issue_data, persons, skip_history, referenced_bys):
 
         resolved = issue_x.getElementsByTagName("resolved")
         issue["resolveDate"] = ""
-        if (len(resolved) > 0) and (not resolved[0] is None):
+        if (len(resolved) > 0) and (resolved[0] is not None):
             resolveDate = resolved[0].firstChild.data
             issue["resolveDate"] = format_time(resolveDate)
 
@@ -372,7 +375,7 @@ def parse_xml(issue_data, persons, skip_history, referenced_bys):
 
             text = comment_x.firstChild
             if text is None:
-                log.warn("Empty comment in issue " + issue["id"])
+                log.warning("Empty comment in issue " + issue["id"])
                 comment["text"] = ""
             else:
                 comment["text"] = text.data
@@ -440,7 +443,7 @@ def load_issues_via_api(issues, persons, url, referenced_bys):
             api_issue = jira_project.issue(issue["externalId"], expand="changelog")
             changelog = api_issue.changelog
         except JIRAError:
-            log.warn("JIRA Error: Changelog cannot be extracted for issue " + issue["externalId"] + ". History omitted!")
+            log.warning("JIRA Error: Changelog cannot be extracted for issue " + issue["externalId"] + ". History omitted!")
             changelog = None
 
         histories = list()
@@ -478,7 +481,7 @@ def load_issues_via_api(issues, persons, url, referenced_bys):
                         if hasattr(change, "author"):
                             user = create_user(change.author.displayName, change.author.name, "")
                         else:
-                            log.warn("No author for history: " + str(change.id) + " created at " + str(change.created))
+                            log.warning("No author for history: " + str(change.id) + " created at " + str(change.created))
                             user = create_user("","","")
                         history["author"] = merge_user_with_user_from_csv(user, persons)
                         history["date"] = format_time(change.created)
@@ -498,7 +501,7 @@ def load_issues_via_api(issues, persons, url, referenced_bys):
                         if hasattr(change, "author"):
                             user = create_user(change.author.displayName, change.author.name, "")
                         else:
-                            log.warn("No author for history: " + str(change.id) + " created at " + str(change.created))
+                            log.warning("No author for history: " + str(change.id) + " created at " + str(change.created))
                             user = create_user("","","")
                         history["author"] = merge_user_with_user_from_csv(user, persons)
                         history["date"] = format_time(change.created)
@@ -590,10 +593,13 @@ def insert_user_data(issues, conf):
     user_buffer = dict()
     # create buffer for user ids (key: user string)
     user_id_buffer = dict()
-    # open database connection
-    dbm = DBManager(conf)
-    # open ID-service connection
-    idservice = idManager(dbm, conf)
+
+    # connect to ID service
+    if conf["useCsv"]:
+        idservice = csvIdManager(conf)
+    else:
+        dbm = DBManager(conf)
+        idservice = dbIdManager(dbm, conf)
 
     def get_user_string(name, email):
         if not email or email is None:
@@ -603,22 +609,21 @@ def get_user_string(name, email):
             return "{name} <{email}>".format(name=name, email=email)
 
     def get_id_and_update_user(user, buffer_db_ids=user_id_buffer):
-        # fix encoding for name and e-mail address
-        if user["name"] is not None and user["name"] != "":
-            name = unicode(user["name"]).encode("utf-8")
-        else:
-            name = unicode(user["username"]).encode("utf-8")
-        mail = unicode(user["email"]).encode("utf-8")  # empty
+
+        # ensure string representation for name and e-mail address
+        name = str(user["name"]) if "name" in user else str(user["username"])
+        mail = str(user["email"]) # may be empty
+
         # construct string for ID service and send query
         user_string = get_user_string(name, mail)
 
         # check buffer to reduce amount of DB queries
         if user_string in buffer_db_ids:
-            log.devinfo("Returning person id for user '{}' from buffer.".format(user_string))
+            log.info("Returning person id for user '{}' from buffer.".format(user_string))
             return buffer_db_ids[user_string]
 
         # get person information from ID service
-        log.devinfo("Passing user '{}' to ID service.".format(user_string))
+        log.info("Passing user '{}' to ID service.".format(user_string))
         idx = idservice.getPersonID(user_string)
 
         # add user information to buffer
@@ -631,16 +636,17 @@ def get_user_from_id(idx, buffer_db=user_buffer):
 
         # check whether user information is in buffer to reduce amount of DB queries
         if idx in buffer_db:
-            log.devinfo("Returning user '{}' from buffer.".format(idx))
+            log.info("Returning user '{}' from buffer.".format(idx))
             return buffer_db[idx]
 
         # get person information from ID service
-        log.devinfo("Passing user id '{}' to ID service.".format(idx))
+        log.info("Passing user id '{}' to ID service.".format(idx))
         person = idservice.getPersonFromDB(idx)
-        user = dict()
-        user["email"] = person["email1"]  # column "email1"
-        user["name"] = person["name"]  # column "name"
-        user["id"] = person["id"]  # column "id"
+        user = {
+            "name": person["name"],
+            "email": person["email1"],
+            "id": person["id"]
+        }
 
         # add user information to buffer
         buffer_db[idx] = user
@@ -1000,8 +1006,8 @@ def find_first_existing(source_folder, filenames):
         :return: the first existing file name, None otherwise
         """
 
-        filenames = map(lambda fi: os.path.join(source_folder, fi), filenames)
-        existing = map(lambda fi: os.path.exists(fi), filenames)
+        filenames = [os.path.join(source_folder, fi) for fi in filenames]
+        existing = [os.path.exists(fi) for fi in filenames]
         first = next((i for (i, x) in enumerate(existing) if x), None)
 
         if first is not None:
@@ -1020,17 +1026,17 @@ def find_first_existing(source_folder, filenames):
         log.error("Person files '{}' do not exist! Exiting early...".format(person_files))
         sys.exit(-1)
 
-    log.devinfo("Loading person csv from file '{}'...".format(srcfile))
+    log.info("Loading person csv from file '{}'...".format(srcfile))
     with open(srcfile, "r") as f:
         person_data = csv.DictReader(f, delimiter=",", skipinitialspace=True)
         persons_by_username = {}
         persons_by_name = {}
         for row in person_data:
-            if not row["AuthorID"] in persons_by_username.keys():
-                author_id_utf8 = unicode(row["AuthorID"]).encode("utf-8")
+            if row["AuthorID"] not in list(persons_by_username.keys()):
+                author_id_utf8 = str(row["AuthorID"]).encode("utf-8")
                 persons_by_username[author_id_utf8] = (row["AuthorName"], row["userEmail"])
-            if not row["AuthorName"] in persons_by_name.keys():
-                author_name_utf8 = unicode(row["AuthorName"]).encode("utf-8")
+            if row["AuthorName"] not in list(persons_by_name.keys()):
+                author_name_utf8 = str(row["AuthorName"]).encode("utf-8")
                 persons_by_name[author_name_utf8] = (row["AuthorName"], row["userEmail"])
 
         persons = dict()
diff --git a/mbox_parsing/mbox_parsing.py b/mbox_parsing/mbox_parsing.py
index fd9fd59..1ad12a5 100644
--- a/mbox_parsing/mbox_parsing.py
+++ b/mbox_parsing/mbox_parsing.py
@@ -15,6 +15,8 @@
 # Copyright 2017 by Raphael Nömmer <noemmer@fim.uni-passau.de>
 # Copyright 2017-2019 by Claus Hunsen <hunsen@fim.uni-passau.de>
 # Copyright 2018-2019 by Thomas Bock <bockthom@fim.uni-passau.de>
+# Copyright 2026 by Thomas Bock <bockthom@cmu.edu>
+# Copyright 2025 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
 # All Rights Reserved.
 """
 This file is able to extract artifact occurrences in e-mail within mbox files.
@@ -28,17 +30,20 @@
 import shutil
 import sys
 from os.path import abspath
+from logging import getLogger
 
-from codeface.cli import log
-from codeface.configuration import Configuration
 from joblib import Parallel, delayed
 from whoosh import index  # import create_in, open_dir, exists_in
 from whoosh.analysis import StandardAnalyzer
 from whoosh.fields import Schema, TEXT, ID
 from whoosh.qparser import QueryParser
 
+from codeface_utils.configuration import Configuration
 from csv_writer import csv_writer
 
+# create logger
+setup_logging()
+log = getLogger(__name__)
 
 def __get_index(mbox, mbox_path, results_folder, schema, reindex):
     """Initialize the search index (and create it, if needed
@@ -56,25 +61,25 @@ def __get_index(mbox, mbox_path, results_folder, schema, reindex):
     index_path = os.path.join(results_folder, "mbox-index", os.path.basename(mbox_path))
     # 1) if reindexing, remove the index folder
     if os.path.exists(index_path) and reindex:
-        log.devinfo("Removing index from path '{}'...".format(index_path))
+        log.info("Removing index from path '{}'...".format(index_path))
         shutil.rmtree(index_path)
     # 2) Check if we need to create the index for Whoosh full-text search
-    log.devinfo("Checking for index in results folder...")
+    log.info("Checking for index in results folder...")
     if (not os.path.exists(index_path)) or (not index.exists_in(index_path)):
         # 2.1) create index
-        log.devinfo("Creating index for text search in results folder.")
+        log.info("Creating index for text search in results folder.")
         os.makedirs(index_path)  # create path
         index.create_in(index_path, schema)  # initialize as index path
         ix = index.open_dir(index_path)  # open as index path
         writer = ix.writer()
         # add all messages to index
         for message in mbox:
-            writer.add_document(messageID=unicode(message['message-id']), content=__mbox_getbody(message))
+            writer.add_document(messageID=str(message['message-id']), content=__mbox_getbody(message))
         writer.commit()
-        log.devinfo("Index created, parsing will begin now.")
+        log.info("Index created, parsing will begin now.")
     else:
         # 2.2) load index
-        log.devinfo("Index has already been created, parsing will begin right away.")
+        log.info("Index has already been created, parsing will begin right away.")
         ix = index.open_dir(index_path)
 
     return ix
@@ -131,12 +136,12 @@ def __mbox_getbody(message):
         body = message.get_payload(decode=True)
 
     if body is None:
-        log.devinfo(message.get_content_type())
-        log.devinfo(
+        log.info(message.get_content_type())
+        log.info(
             "An image or some other content has been found that cannot be indexed. Message is given an empty body.")
         body = ' '
 
-    return unicode(body, errors="replace")
+    return str(body, errors="replace")
 
 
 def __parse_execute(artifact, schema, my_index, include_filepath):
@@ -149,7 +154,7 @@ def __parse_execute(artifact, schema, my_index, include_filepath):
     :return: a match list of tuples (file name, artifact, message ID)
     """
 
-    log.devinfo("Searching for artifact ({}, {})...".format(artifact[0], artifact[1]))
+    log.info("Searching for artifact ({}, {})...".format(artifact[0], artifact[1]))
 
     result = []
 
@@ -247,7 +252,7 @@ def run():
     args = parser.parse_args(sys.argv[1:])
     __resdir = abspath(args.resdir)
     __maildir = abspath(args.maildir)
-    __codeface_conf, __project_conf = map(abspath, (args.config, args.project))
+    __codeface_conf, __project_conf = list(map(abspath, (args.config, args.project)))
 
     # initialize configuration
     __conf = Configuration.load(__codeface_conf, __project_conf)