diff --git a/localai/Makefile b/localai/Makefile
new file mode 100644
index 0000000..04a1eb3
--- /dev/null
+++ b/localai/Makefile
@@ -0,0 +1,39 @@
+VENV_BIN = python3 -m venv
+VENV_DIR ?= .venv
+VENV_ACTIVATE = $(VENV_DIR)/bin/activate
+VENV_RUN = . $(VENV_ACTIVATE)
+
+venv: $(VENV_ACTIVATE)
+
+$(VENV_ACTIVATE): setup.py setup.cfg
+	test -d .venv || $(VENV_BIN) .venv
+	$(VENV_RUN); pip install --upgrade pip setuptools plux
+	$(VENV_RUN); pip install --upgrade black isort pyproject-flake8 flake8-black flake8-isort
+	$(VENV_RUN); pip install -e .
+	touch $(VENV_DIR)/bin/activate
+
+clean:
+	rm -rf .venv/
+	rm -rf build/
+	rm -rf .eggs/
+	rm -rf *.egg-info/
+
+install: venv
+	$(VENV_RUN); python setup.py develop
+
+lint:              		  ## Run code linter to check code style
+	($(VENV_RUN); python -m pflake8 --show-source)
+
+format:            		  ## Run black and isort code formatter
+	$(VENV_RUN); python -m isort .; python -m black .
+
+dist: venv
+	$(VENV_RUN); python setup.py sdist bdist_wheel
+
+publish: clean-dist venv dist
+	$(VENV_RUN); pip install --upgrade twine; twine upload dist/*
+
+clean-dist: clean
+	rm -rf dist/
+
+.PHONY: clean clean-dist dist install publish
diff --git a/localai/README.md b/localai/README.md
new file mode 100644
index 0000000..44c2b0f
--- /dev/null
+++ b/localai/README.md
@@ -0,0 +1,34 @@
+LocalAI Extension
+===============================
+
+LocalAI directly in localstack
+
+## Install local development version
+
+To install the extension into localstack in developer mode, you will need Python 3.10, and create a virtual environment in the extensions project.
+
+In the newly generated project, simply run
+
+```bash
+make install
+```
+
+Then, to enable the extension for LocalStack, run
+
+```bash
+localstack extensions dev enable .
+```
+
+You can then start LocalStack with `EXTENSION_DEV_MODE=1` to load all enabled extensions:
+
+```bash
+EXTENSION_DEV_MODE=1 localstack start
+```
+
+## Install from GitHub repository
+
+To distribute your extension, simply upload it to your github account. Your extension can then be installed via:
+
+```bash
+localstack extensions install "git+https://github.com/localstack/localstack-localai-extension/#egg=localstack-localai-extension"
+```
diff --git a/localai/localai/__init__.py b/localai/localai/__init__.py
new file mode 100644
index 0000000..9558235
--- /dev/null
+++ b/localai/localai/__init__.py
@@ -0,0 +1 @@
+name = "localai"
diff --git a/localai/localai/extension.py b/localai/localai/extension.py
new file mode 100644
index 0000000..82caae9
--- /dev/null
+++ b/localai/localai/extension.py
@@ -0,0 +1,113 @@
+import logging
+import os.path
+import threading
+import time
+from typing import Optional
+
+from localstack import config, constants
+from localstack.extensions.api import Extension, aws, http
+from localstack.utils.container_utils.container_client import (
+    ContainerConfiguration,
+    VolumeBind,
+    VolumeMappings,
+)
+from localstack.utils.docker_utils import get_default_volume_dir_mount
+from localstack.utils.strings import short_uid
+
+from localai.server import ContainerServer
+
+LOG = logging.getLogger(__name__)
+
+
+class LocalAIExtension(Extension):
+    name = "localstack-localai-extension"
+
+    server: Optional[ContainerServer]
+    proxy: Optional[http.ProxyHandler]
+
+    def __init__(self):
+        self.server = None
+        self.proxy = None
+
+    def on_extension_load(self):
+        # TODO: logging should be configured automatically for extensions
+        if config.DEBUG:
+            level = logging.DEBUG
+        else:
+            level = logging.INFO
+        logging.getLogger("localai").setLevel(level=level)
+
+    def on_platform_start(self):
+        volumes = VolumeMappings()
+        # FIXME
+        if localstack_volume := get_default_volume_dir_mount():
+            models_source = os.path.join(localstack_volume.source, "cache", "localai", "models")
+            volumes.append(VolumeBind(models_source, "/build/models"))
+        else:
+            LOG.warning("no volume mounted, will not be able to store models")
+
+        server = ContainerServer(
+            8080,
+            ContainerConfiguration(
+                image_name="quay.io/go-skynet/local-ai:latest",
+                name=f"localstack-localai-{short_uid()}",
+                volumes=volumes,
+                env_vars={
+                    # FIXME: is this a good model to pre-load?
+                    #  should we call the extension like the pre-loaded model instead?
+                    "PRELOAD_MODELS": '[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]',
+                },
+            ),
+        )
+        self.server = server
+        # FIXME: start can take *very* long, since it may download the localai image (which is several GB),
+        #  and then download the pre-trained model, which is another 2GB.
+        LOG.info("starting up %s as %s", server.config.image_name, server.config.name)
+        server.start()
+
+        def _update_proxy_job():
+            # wait until container becomes available and then update the proxy to point to that IP
+            i = 1
+
+            while True:
+                if self.proxy:
+                    if self.server.get_network_ip():
+                        LOG.info(
+                            "serving LocalAI API on http://localai.%s:%s",
+                            constants.LOCALHOST_HOSTNAME,
+                            config.get_edge_port_http(),
+                        )
+                        self.proxy.proxy.forward_base_url = self.server.url
+                        break
+
+                time.sleep(i)
+                i = i * 2
+
+        threading.Thread(target=_update_proxy_job, daemon=True).start()
+
+    def on_platform_shutdown(self):
+        if self.server:
+            self.server.shutdown()
+            self.server.client.remove_container(self.server.config.name)
+
+    def update_gateway_routes(self, router: http.Router[http.RouteHandler]):
+        LOG.info("setting up proxy to %s", self.server.url)
+        self.proxy = http.ProxyHandler(forward_base_url=self.server.url)
+
+        # hostname aliases
+        router.add(
+            "/",
+            host="localai.<host>",
+            endpoint=self.proxy,
+        )
+        router.add(
+            "/<path:path>",
+            host="localai.<host>",
+            endpoint=self.proxy,
+        )
+
+    def update_request_handlers(self, handlers: aws.CompositeHandler):
+        pass
+
+    def update_response_handlers(self, handlers: aws.CompositeResponseHandler):
+        pass
diff --git a/localai/localai/server.py b/localai/localai/server.py
new file mode 100644
index 0000000..c770f9c
--- /dev/null
+++ b/localai/localai/server.py
@@ -0,0 +1,57 @@
+import logging
+from typing import Optional
+
+from localstack.utils.container_utils.container_client import (
+    ContainerClient,
+    ContainerConfiguration,
+)
+from localstack.utils.docker_utils import DOCKER_CLIENT
+from localstack.utils.serving import Server
+from localstack.utils.sync import poll_condition
+
+LOG = logging.getLogger(__name__)
+
+
+class ContainerServer(Server):
+    client: ContainerClient
+    config: ContainerConfiguration
+
+    container_id: Optional[str]
+
+    def __init__(
+        self,
+        port: int,
+        config: ContainerConfiguration,
+        host: str = "localhost",
+        client: ContainerClient = None,
+    ) -> None:
+        super().__init__(port, host)
+        self.config = config
+        self.client = client if client else DOCKER_CLIENT
+        self.container_id = None
+
+    def is_up(self) -> bool:
+        if not self.is_container_running():
+            return False
+        return super().is_up()
+
+    def is_container_running(self) -> bool:
+        if not self.config.name:
+            return False
+        return self.client.is_container_running(self.config.name)
+
+    def wait_is_container_running(self, timeout=None) -> bool:
+        return poll_condition(self.is_container_running, timeout)
+
+    def do_run(self):
+        if self.client.is_container_running(self.config.name):
+            raise ValueError(f"Container named {self.config.name} already running")
+
+        self.container_id = self.client.create_container_from_config(self.config)
+        self.client.start_container(self.container_id)
+        # re-configure host now that the network ip is known
+        self._host = self.get_network_ip()
+
+    def get_network_ip(self) -> str:
+        inspect = self.client.inspect_container(self.container_id)
+        return inspect["NetworkSettings"]["IPAddress"]
diff --git a/localai/pyproject.toml b/localai/pyproject.toml
new file mode 100644
index 0000000..8b034d6
--- /dev/null
+++ b/localai/pyproject.toml
@@ -0,0 +1,19 @@
+# LocalStack project configuration
+[build-system]
+requires = ['setuptools', 'wheel', 'plux>=1.3.1']
+build-backend = "setuptools.build_meta"
+
+[tool.black]
+line_length = 100
+include = '(localai/.*\.py$)'
+
+[tool.isort]
+profile = 'black'
+line_length = 100
+
+# call using pflake8
+[tool.flake8]
+max-line-length = 110
+ignore = 'E203,E266,E501,W503,F403'
+select = 'B,C,E,F,I,W,T4,B9'
+exclude = '.venv*,venv*,dist,*.egg-info,.git'
diff --git a/localai/setup.cfg b/localai/setup.cfg
new file mode 100644
index 0000000..d0375cf
--- /dev/null
+++ b/localai/setup.cfg
@@ -0,0 +1,19 @@
+[metadata]
+name = localstack-localai-extension
+version = 0.1.0
+url = https://github.com/localstack/localstack-localai-extension
+author = LocalStack
+author_email = info@localstack.cloud
+description = LocalAI directly in localstack
+long_description = file: README.md
+long_description_content_type = text/markdown; charset=UTF-8
+
+[options]
+zip_safe = False
+packages = find:
+install_requires =
+    localstack>=2.2
+
+[options.entry_points]
+localstack.extensions =
+    localstack-localai-extension = localai.extension:LocalAIExtension
diff --git a/localai/setup.py b/localai/setup.py
new file mode 100644
index 0000000..c823345
--- /dev/null
+++ b/localai/setup.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python
+from setuptools import setup
+
+setup()