InftyAI · InftyAI-Agent · May 1, 2025 · Apr 30, 2025
diff --git a/docs/examples/README.md → examples/README.md b/docs/examples/README.md → examples/README.md
diff --git a/docs/examples/envoy-ai-gateway/basic.yaml → examples/envoy-ai-gateway/basic.yaml b/docs/examples/envoy-ai-gateway/basic.yaml → examples/envoy-ai-gateway/basic.yaml
diff --git a/docs/examples/hostpath/playground.yaml → examples/hostpath/playground.yaml b/docs/examples/hostpath/playground.yaml → examples/hostpath/playground.yaml
diff --git a/docs/examples/hpa/README.md → examples/hpa/README.md b/docs/examples/hpa/README.md → examples/hpa/README.md
diff --git a/docs/examples/hpa/playground.yaml → examples/hpa/playground.yaml b/docs/examples/hpa/playground.yaml → examples/hpa/playground.yaml
diff --git a/docs/examples/huggingface/playground.yaml → examples/huggingface/playground.yaml b/docs/examples/huggingface/playground.yaml → examples/huggingface/playground.yaml
diff --git a/docs/examples/llamacpp/README.md → examples/llamacpp/README.md b/docs/examples/llamacpp/README.md → examples/llamacpp/README.md
diff --git a/docs/examples/llamacpp/playground.yaml → examples/llamacpp/playground.yaml b/docs/examples/llamacpp/playground.yaml → examples/llamacpp/playground.yaml
diff --git a/docs/examples/modelscope/playground.yaml → examples/modelscope/playground.yaml b/docs/examples/modelscope/playground.yaml → examples/modelscope/playground.yaml
diff --git a/docs/examples/multi-nodes/service.yaml → examples/multi-nodes/service.yaml b/docs/examples/multi-nodes/service.yaml → examples/multi-nodes/service.yaml
diff --git a/docs/examples/objstore-oss/playground.yaml → examples/objstore-oss/playground.yaml b/docs/examples/objstore-oss/playground.yaml → examples/objstore-oss/playground.yaml
diff --git a/docs/examples/ollama/playground.yaml → examples/ollama/playground.yaml b/docs/examples/ollama/playground.yaml → examples/ollama/playground.yaml
diff --git a/docs/examples/sglang/playground.yaml → examples/sglang/playground.yaml b/docs/examples/sglang/playground.yaml → examples/sglang/playground.yaml
diff --git a/...ulative-decoding/llamacpp/playground.yaml → ...ulative-decoding/llamacpp/playground.yaml b/...ulative-decoding/llamacpp/playground.yaml → ...ulative-decoding/llamacpp/playground.yaml
diff --git a/...speculative-decoding/vllm/playground.yaml → ...speculative-decoding/vllm/playground.yaml b/...speculative-decoding/vllm/playground.yaml → ...speculative-decoding/vllm/playground.yaml
diff --git a/docs/examples/tgi/playground.yaml → examples/tgi/playground.yaml b/docs/examples/tgi/playground.yaml → examples/tgi/playground.yaml
diff --git a/docs/proposals/NNNN-template/README.md → proposals/NNNN-template/README.md b/docs/proposals/NNNN-template/README.md → proposals/NNNN-template/README.md
diff --git a/docs/proposals/NNNN-template/proposal.yaml → proposals/NNNN-template/proposal.yaml b/docs/proposals/NNNN-template/proposal.yaml → proposals/NNNN-template/proposal.yaml
diff --git a/site/.gitignore b/site/.gitignore
@@ -0,0 +1,5 @@
+/public
+resources/
+node_modules/
+package-lock.json
+.hugo_build.lock
diff --git a/docs/assets/icon.svg → site/assets/icons/logo.svg b/docs/assets/icon.svg → site/assets/icons/logo.svg
diff --git a/site/assets/scss/_styles_project.scss b/site/assets/scss/_styles_project.scss
@@ -0,0 +1,9 @@
+a {
+  text-decoration: none;
+}
+
+.td-box--white {
+  a {
+    color: $primary !important;
+  }
+}
diff --git a/site/assets/scss/_variables_project.scss b/site/assets/scss/_variables_project.scss
@@ -0,0 +1,9 @@
+/*
+Add styles or override variables from the theme here.
+*/
+
+// Theme colors
+$primary: #EE4C1E;
+$secondary: #FFFFFF;
+
+$link-color: #EE4C1E;
diff --git a/site/content/en/_index.md b/site/content/en/_index.md
@@ -0,0 +1,59 @@
+---
+title: llmaz
+---
+
+{{< blocks/cover color="primary" image_anchor="top" height="max" >}}
+<p><img class="w-50 h-auto mb-4" src="/images/logo.png" class="llmaz-logo" /></p>
+<a class="btn btn-lg btn-secondary me-3 mb-4" href="/docs/">
+  Learn More <i class="fas fa-arrow-alt-circle-right ms-2"></i>
+</a>
+<a class="btn btn-lg btn-secondary me-3 mb-4" href="https://github.com/InftyAI/llmaz">
+  GitHub <i class="fab fa-github ms-2 "></i>
+</a>
+<p class="lead mt-5 -text-white">Easy, advanced inference platform for large language models on Kubernetes</p>
+{{< blocks/link-down color="white" >}}
+
+{{< /blocks/cover >}}
+
+
+{{% blocks/section color="white" type="row" %}}
+
+<p class="h1 text-center mb-4">Key Features</p>
+
+{{% blocks/feature icon="fas fa-user-shield" title="Easy of Use" %}}
+People can quick deploy a LLM service with minimal configurations.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fas fa-cogs" title="Broad Backends Support" %}}
+llmaz supports a wide range of advanced inference backends for different scenarios, like <a href="https://github.com/vllm-project/vllm">vLLM</a>, <a href="https://github.com/huggingface/text-generation-inference">Text-Generation-Inference</a>, <a href="https://github.com/sgl-project/sglang">SGLang</a>, <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a>. Find the full list of supported backends <a href="/InftyAI/llmaz/blob/main/docs/support-backends.md">here</a>.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fas fa-exchange-alt" title="Accelerator Fungibility" %}}
+llmaz supports serving the same LLM with various accelerators to optimize cost and performance.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fas fa-warehouse" title="Various Model Providers" %}}
+llmaz supports a wide range of model providers, such as <a href="https://huggingface.co/" rel="nofollow">HuggingFace</a>, <a href="https://www.modelscope.cn" rel="nofollow">ModelScope</a>, ObjectStores. llmaz will automatically handle the model loading, requiring no effort from users.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fas fa-network-wired" title="Multi-Host Support" %}}
+llmaz supports both single-host and multi-host scenarios with <a href="https://github.com/kubernetes-sigs/lws">LWS</a> from day 0.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fas fa-door-open" title="AI Gateway Support" %}}
+Offering capabilities like token-based rate limiting, model routing with the integration of <a href="https://aigateway.envoyproxy.io/" rel="nofollow">Envoy AI Gateway</a>.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fas fa-comments" title="Build-in ChatUI" %}}
+Out-of-the-box chatbot support with the integration of <a href="https://github.com/open-webui/open-webui">Open WebUI</a>, offering capacities like function call, RAG, web search and more, see configurations <a href="/InftyAI/llmaz/blob/main/docs/open-webui.md">here</a>.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fas fa-expand-arrows-alt" title="Scaling Efficiency" %}}
+llmaz supports horizontal scaling with <a href="/InftyAI/llmaz/blob/main/docs/examples/hpa/README.md">HPA</a> by default and will integrate with autoscaling components like <a href="https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler">Cluster-Autoscaler</a> or <a href="https://github.com/kubernetes-sigs/karpenter">Karpenter</a> for smart scaling across different clouds.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fas fa-box-open" title="Efficient Model Distribution (WIP)" %}}
+Out-of-the-box model cache system support with <a href="https://github.com/InftyAI/Manta">Manta</a>, still under development right now with architecture reframing.
+{{% /blocks/feature %}}
+
+{{% /blocks/section %}}
diff --git a/site/content/en/docs/_index.md b/site/content/en/docs/_index.md
@@ -0,0 +1,5 @@
+---
+title: Documentation
+linkTitle: Documentation
+menu: {main: {weight: 20}}
+---
diff --git a/docs/develop.md → site/content/en/docs/develop.md b/docs/develop.md → site/content/en/docs/develop.md
@@ -1,6 +1,9 @@
-# Develop Guidance
-
-A develop guidance for people who want to learn more about this project.
+---
+title: Develop Guidance
+weight: 3
+description: >
+  This section contains a develop guidance for people who want to learn more about this project.
+---
 
 ## Project Structure
 
@@ -22,4 +25,4 @@ See the [API Reference](./reference/core.v1alpha1.md) for more details.
 
 ### Inference APIs
 
-See the [API Reference](./reference/inference.v1alpha1.md) for more details.
+See the [API Reference](./reference/inference.v1alpha1.md) for more details.
diff --git a/docs/installation.md → site/content/en/docs/installation.md b/docs/installation.md → site/content/en/docs/installation.md
@@ -1,4 +1,10 @@
-# Installation Guide
+---
+title: Installation
+weight: 1
+description: >
+    This section introduces the installation guidance for llmaz.
+---
+
 
 ## Prerequisites
 

diff --git a/site/content/en/docs/integrations/_index.md b/site/content/en/docs/integrations/_index.md
@@ -0,0 +1,6 @@
+---
+title: Integrations
+weight: 2
+description: >
+  This section contains the llmaz integration information.
+---
diff --git a/docs/envoy-ai-gateway.md → .../en/docs/integrations/envoy-ai-gateway.md b/docs/envoy-ai-gateway.md → .../en/docs/integrations/envoy-ai-gateway.md
@@ -1,4 +1,7 @@
-# Envoy AI Gateway
+---
+title: Envoy AI Gateway
+weight: 1
+---
 
 [Envoy AI Gateway](https://aigateway.envoyproxy.io/) is an open source project for using Envoy Gateway
 to handle request traffic from application clients to Generative AI services.

diff --git a/docs/open-webui.md → ...ontent/en/docs/integrations/open-webui.md b/docs/open-webui.md → ...ontent/en/docs/integrations/open-webui.md
@@ -1,4 +1,7 @@
-# Open-WebUI
+---
+title: Open WebUI
+weight: 2
+---
 
 [Open WebUI](https://github.com/open-webui/open-webui) is a user-friendly AI interface with OpenAI-compatible APIs, serving as the default chatbot for llmaz.
 

diff --git a/docs/prometheus-operator/README.md → .../docs/integrations/prometheus-operator.md b/docs/prometheus-operator/README.md → .../docs/integrations/prometheus-operator.md
@@ -1,10 +1,13 @@
-# Install Prometheus Operator Guide
+---
+title: Prometheus Operator
+weight: 3
+---
 
 Currently, llmaz has already integrated metrics. This document provides deployment steps explaining how to install and configure Prometheus Operator in a Kubernetes cluster.
 
 ### Install the prometheus operator
 
-Please follow the [documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/getting-started/installation.md) to install 
+Please follow the [documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/getting-started/installation.md) to install
 
 ```bash
 # Installing the prometheus operator
@@ -14,9 +17,9 @@ prometheus-operator-55b5c96cf8-jl2nx   1/1     Running   0          12s
 ```
 Ensure that the Prometheus Operator Pod is running successfully.
 
-### Install the ServiceMonitor CR for llmaz 
+### Install the ServiceMonitor CR for llmaz
 
-To enable monitoring for the llmaz system, you need to install the ServiceMonitor custom resource (CR). 
+To enable monitoring for the llmaz system, you need to install the ServiceMonitor custom resource (CR).
 You can either modify the Helm chart prometheus according to the [documentation](./../../chart/values.global.yaml) or use `make install-prometheus` in Makefile.
 
 - Using Helm Chart: to modify the values.global.yaml
@@ -73,4 +76,4 @@ Forwarding from 0.0.0.0:9090 -> 9090
 If using kind, we can use port-forward, `kubectl port-forward services/prometheus-operated  39090:9090 --address 0.0.0.0 -n llmaz-system`
 This allows us to access prometheus using a browser: `http://localhost:9090/query`
 
-![prometheus](prometheus.png?raw=true)
+![prometheus](/images/prometheus.png?raw=true)
diff --git a/docs/support-backends.md → .../en/docs/integrations/support-backends.md b/docs/support-backends.md → .../en/docs/integrations/support-backends.md
@@ -1,4 +1,7 @@
-# All Kinds of Supported Inference Backends
+---
+title: Supported Inference Backends
+weight: 4
+---
 
 If you want to integrate more backends into llmaz, please refer to this [PR](https://github.com/InftyAI/llmaz/pull/182). It's always welcomed.
 

diff --git a/site/content/en/docs/reference/_index.md b/site/content/en/docs/reference/_index.md
@@ -0,0 +1,9 @@
+---
+title: Reference
+weight: 4
+description: >
+  This section contains the llmaz reference information.
+menu:
+  main:
+    weight: 30
+---
diff --git a/docs/reference/core.v1alpha1.md → ...ontent/en/docs/reference/core.v1alpha1.md b/docs/reference/core.v1alpha1.md → ...ontent/en/docs/reference/core.v1alpha1.md
diff --git a/docs/reference/inference.v1alpha1.md → ...t/en/docs/reference/inference.v1alpha1.md b/docs/reference/inference.v1alpha1.md → ...t/en/docs/reference/inference.v1alpha1.md
diff --git a/site/content/en/search.md b/site/content/en/search.md
@@ -0,0 +1,4 @@
+---
+title: Search Results
+layout: search
+---
diff --git a/site/go.mod b/site/go.mod
@@ -0,0 +1,5 @@
+module github.com/InftyAI/llmaz/site
+
+go 1.23
+
+require github.com/google/docsy v0.11.0 // indirect
diff --git a/site/go.sum b/site/go.sum
@@ -0,0 +1,4 @@
+github.com/FortAwesome/Font-Awesome v0.0.0-20240716171331-37eff7fa00de/go.mod h1:IUgezN/MFpCDIlFezw3L8j83oeiIuYoj28Miwr/KUYo=
+github.com/google/docsy v0.11.0 h1:QnV40cc28QwS++kP9qINtrIv4hlASruhC/K3FqkHAmM=
+github.com/google/docsy v0.11.0/go.mod h1:hGGW0OjNuG5ZbH5JRtALY3yvN8ybbEP/v2iaK4bwOUI=
+github.com/twbs/bootstrap v5.3.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=
diff --git a/site/hugo.toml b/site/hugo.toml
@@ -0,0 +1,169 @@
+baseURL = "https://llmaz.inftyai.com/"
+title = "llmaz"
+
+# Language settings
+contentDir = "content/en"
+defaultContentLanguage = "en"
+defaultContentLanguageInSubdir = false
+# Useful when translating.
+enableMissingTranslationPlaceholders = true
+
+enableRobotsTXT = true
+
+# Will give values to .Lastmod etc.
+enableGitInfo = true
+
+# Comment out to disable taxonomies in Docsy
+disableKinds = ["taxonomy"]
+
+# Highlighting config
+pygmentsCodeFences = true
+pygmentsUseClasses = false
+# Use the new Chroma Go highlighter in Hugo.
+pygmentsUseClassic = false
+#pygmentsOptions = "linenos=table"
+# See https://help.farbox.com/pygments.html
+pygmentsStyle = "tango"
+
+# Configure how URLs look like per section.
+[permalinks]
+blog = "/:section/:year/:month/:day/:slug/"
+
+# Image processing configuration.
+[imaging]
+resampleFilter = "CatmullRom"
+quality = 75
+anchor = "Smart"
+
+# [services]
+# [services.googleAnalytics]
+# id = ""
+
+# Language configuration
+
+[languages]
+[languages.en]
+languageName ="English"
+title = "llmaz"
+[languages.en.params]
+description = "Easy, advanced inference platform for large language models on Kubernetes."
+
+[markup]
+  [markup.goldmark]
+    [markup.goldmark.parser.attribute]
+      block = true
+    [markup.goldmark.renderer]
+      unsafe = true
+  [markup.highlight]
+    # See a complete list of available styles at https://xyproto.github.io/splash/docs/all.html
+    style = "tango"
+    # Uncomment if you want your chosen highlight style used for code blocks without a specified language
+    # guessSyntax = "true"
+
+# Comment out if you don't want the "print entire section" link enabled.
+[outputs]
+section = ["HTML", "print", "RSS"]
+
+# Everything below this are Site Params
+
+[params]
+# First one is picked as the Twitter card image if not set on page.
+# images = ["images/project-illustration.png"]
+
+# Menu title if your navbar has a versions selector to access old versions of your site.
+# This menu appears only if you have at least one [params.versions] set.
+version_menu = "Releases"
+
+# Flag used in the "version-banner" partial to decide whether to display a
+# banner on every page indicating that this is an archived version of the docs.
+# Set this flag to "true" if you want to display the banner.
+archived_version = false
+
+# The version number for the version of the docs represented in this doc set.
+# Used in the "version-banner" partial to display a version number for the
+# current doc set.
+version = "v0.1.3"
+
+# A link to latest version of the docs. Used in the "version-banner" partial to
+# point people to the main doc site.
+url_latest_version = "https://example.com"
+
+# Repository configuration (URLs for in-page links to opening issues and suggesting changes)
+github_repo = "https://github.com/InftyAI/llmaz"
+# An optional link to a related project repo. For example, the sibling repository where your product code lives.
+github_project_repo = "https://github.com/InftyAI/llmaz"
+
+# Specify a value here if your content directory is not in your repo's root directory
+github_subdir = "site"
+
+# Uncomment this if your GitHub repo does not have "main" as the default branch,
+# or specify a new value if you want to reference another branch in your GitHub links
+github_branch= "main"
+
+# Google Custom Search Engine ID. Remove or comment out to disable search.
+# gcs_engine_id = "d72aa9b2712488cc3"
+
+# Enable Lunr.js offline search
+offlineSearch = true
+
+# Enable syntax highlighting and copy buttons on code blocks with Prism
+prism_syntax_highlighting = false
+
+copyright = "The InftyAI Team"
+
+# User interface configuration
+[params.ui]
+#  Set to true to disable breadcrumb navigation.
+breadcrumb_disable = false
+# Set to false if you don't want to display a logo (/assets/icons/logo.svg) in the top navbar
+navbar_logo = true
+# Set to true if you don't want the top navbar to be translucent when over a `block/cover`, like on the homepage.
+navbar_translucent_over_cover_disable = false
+# Enable to show the side bar menu in its compact state.
+sidebar_menu_compact = false
+# Set to true to hide the sidebar search box (the top nav search box will still be displayed if search is enabled)
+sidebar_search_disable = true
+
+# Adds a H2 section titled "Feedback" to the bottom of each doc. The responses are sent to Google Analytics as events.
+# This feature depends on [services.googleAnalytics] and will be disabled if "services.googleAnalytics.id" is not set.
+# If you want this feature, but occasionally need to remove the "Feedback" section from a single page,
+# add "hide_feedback: true" to the page's front matter.
+[params.ui.feedback]
+enable = true
+# The responses that the user sees after clicking "yes" (the page was helpful) or "no" (the page was not helpful).
+yes = 'Glad to hear it! Please <a href="https://github.com/USERNAME/REPOSITORY/issues/new">tell us how we can improve</a>.'
+no = 'Sorry to hear that. Please <a href="https://github.com/USERNAME/REPOSITORY/issues/new">tell us how we can improve</a>.'
+
+# Adds a reading time to the top of each doc.
+# If you want this feature, but occasionally need to remove the Reading time from a single page,
+# add "hide_readingtime: true" to the page's front matter
+[params.ui.readingtime]
+enable = true
+
+[params.links]
+# End user relevant links. These will show up on left side of footer and in the community page if you have one.
+[[params.links.user]]
+  name ="Twitter"
+  url = "https://x.com/InftyAI"
+  icon = "fab fa-x-twitter"
+  desc = "Follow us on Twitter to get the latest news!"
+# Developer relevant links. These will show up on right side of footer and in the community page if you have one.
+[[params.links.developer]]
+  name = "GitHub"
+  url = "https://github.com/InftyAI/llmaz"
+  icon = "fab fa-github"
+  desc = "Github repository"
+[[params.links.developer]]
+  name = "Slack"
+  url = "https://inftyai.slack.com/"
+  icon = "fab fa-slack"
+  desc = "Chat with other developers"
+
+# hugo module configuration
+[module]
+  [module.hugoVersion]
+    extended = true
+    min = "0.110.0"
+  [[module.imports]]
+    path = "github.com/google/docsy"
+    disable = false
diff --git a/site/layouts/404.html b/site/layouts/404.html
@@ -0,0 +1,6 @@
+{{ define "main" -}}
+<div class="td-content">
+  <h1>Not found</h1>
+  <p>Oops! This page doesn't exist. Try going back to the <a href="{{ "" | relURL }}">home page</a>.</p>
+</div>
+{{- end }}