8000 feat: deploy and use buildkit to build dynamo images by julienmancuso · Pull Request #450 · ai-dynamo/dynamo · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

feat: deploy and use buildkit to build dynamo images #450

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deploy/dynamo/helm/dynamo-platform-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ dynamo-operator:
secure: true
bentoRepositoryName: yatai-bentos

bentoImageBuildEngine: kaniko
bentoImageBuildEngine: buildkit
addNamespacePrefixToImageName: false

estargz:
Expand Down
36 changes: 36 additions & 0 deletions deploy/dynamo/helm/platform/components/operator/buildkitd.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
debug = true
[history]
maxAge = 345600
maxEntries = 1000
[worker.oci]
enabled = true
gc = true
gckeepstorage = "1000GB"

[[worker.oci.gcpolicy]]
keepBytes = "200GB"
keepDuration = "168h" # 7 days
filters = [ "type==source.local", "type==exec.cachemount", "type==source.git.checkout"]
[[worker.oci.gcpolicy]]
all = false
keepDuration = "336h" # 14 days
keepBytes = "300GB"
[[worker.oci.gcpolicy]]
all = true
keepBytes = "500GB"
[registry."docker.io"]
mirrors = []
132 changes: 132 additions & 0 deletions deploy/dynamo/helm/platform/components/operator/templates/buildkit.yaml
< 10000 /span>
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if .Values.dynamo.bentoImageBuildEngine | eq "buildkit" }}
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
name: {{ include "dynamo-operator.fullname" . }}-buildkitd
spec:
serviceName: {{ include "dynamo-operator.fullname" . }}-buildkitd
podManagementPolicy: Parallel
updateStrategy:
type: RollingUpdate
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
template:
metadata:
labels:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
annotations:
container.apparmor.security.beta.kubernetes.io/buildkitd: unconfined
# see buildkit/docs/rootless.md for caveats of rootless mode
spec:
containers:
- name: buildkitd
image: moby/buildkit:v0.20.0-rootless
args:
- --oci-worker-no-process-sandbox
- --addr
- unix:///run/user/1000/buildkit/buildkitd.sock
- --addr
- tcp://0.0.0.0:1234
resources:
requests:
cpu: 3
memory: 8Gi
limits:
cpu: 8
memory: 30Gi
readinessProbe:
exec:
command:
- buildctl
- debug
- workers
initialDelaySeconds: 5
periodSeconds: 30
livenessProbe:
exec:
command:
- buildctl
- debug
- workers
initialDelaySeconds: 5
periodSeconds: 30
securityContext:
seccompProfile:
type: Unconfined
# To change UID/GID, you need to rebuild the image
runAsUser: 1000
runAsGroup: 1000
volumeMounts:
- mountPath: /home/user/.local/share/buildkit
name: cache
- mountPath: /home/user/.config/buildkit
name: config
readOnly: true
- mountPath: /dev/shm
name: dshm
securityContext:
fsGroup: 1000
volumes:
- name: config
configMap:
name: {{ include "dynamo-operator.fullname" . }}-buildkitd
items:
- key: buildkitd.toml
path: buildkitd.toml
- name: dshm
emptyDir:
medium: Memory
volumeClaimTemplates:
- metadata:
name: cache
spec:
accessModes:
- ReadWriteOnce
storageClassName: local-path
resources:
requests:
storage: 1000Gi
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "dynamo-operator.fullname" . }}-buildkitd
labels:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
spec:
ports:
- name: http
port: 1234
targetPort: 1234
protocol: TCP
clusterIP: None
selector:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "dynamo-operator.fullname" . }}-buildkitd
data:
buildkitd.toml: |
{{- .Files.Get "buildkitd.toml" | nindent 4 }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ stringData:
INTERNAL_IMAGES_KANIKO: {{ .Values.dynamo.internalImages.kaniko | quote }}
INTERNAL_IMAGES_BUILDKIT: {{ .Values.dynamo.internalImages.buildkit | quote }}
INTERNAL_IMAGES_BUILDKIT_ROOTLESS: {{ .Values.dynamo.internalImages.buildkitRootless | quote }}
BUILDKIT_URL: tcp://{{ include "dynamo-operator.fullname" . }}-buildkitd:1234

BENTO_IMAGE_BUILD_ENGINE: {{ .Values.dynamo.bentoImageBuildEngine | quote }}

Expand Down
4 changes: 2 additions & 2 deletions deploy/dynamo/helm/platform/components/operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ dynamo:
internalImages:
bentoDownloader: quay.io/bentoml/bento-downloader:0.0.5
kaniko: quay.io/bentoml/kaniko:debug
buildkit: quay.io/bentoml/buildkit:master
buildkit: moby/buildkit:latest
buildkitRootless: quay.io/bentoml/buildkit:master-rootless
metricsTransformer: quay.io/bentoml/yatai-bento-metrics-transformer:0.0.4
debugger: quay.io/bentoml/bento-debugger:0.0.8
Expand All @@ -110,7 +110,7 @@ dynamo:
secure: true
bentoRepositoryName: yatai-bentos

bentoImageBuildEngine: kaniko # options: kaniko, buildkit, buildkit-rootless
bentoImageBuildEngine: buildkit # options: kaniko, buildkit, buildkit-rootless
addNamespacePrefixToImageName: false

estargz:
Expand Down
6 changes: 0 additions & 6 deletions deploy/dynamo/operator/internal/controller/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
package controller

import (
"strings"

"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -78,7 +76,3 @@ func getPvcName(crd metav1.Object, defaultName *string) string {
}
return crd.GetName()
}

func generateDynamoNimRequestName(tag string) string {
return strings.Split(tag, ":")[0]
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package controller

import (
"context"
"strings"

"dario.cat/mergo"
"emperror.dev/errors"
Expand Down Expand Up @@ -133,7 +134,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
// reconcile the dynamoNimRequest
dynamoNimRequest := &nvidiacomv1alpha1.DynamoNimRequest{
ObjectMeta: metav1.ObjectMeta{
Name: generateDynamoNimRequestName(dynamoDeployment.Spec.DynamoNim),
Name: strings.ReplaceAll(dynamoDeployment.Spec.DynamoNim, ":", "--"),
Namespace: dynamoDeployment.Namespace,
},
Spec: nvidiacomv1alpha1.DynamoNimRequestSpec{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1784,7 +1784,7 @@ monitoring.options.insecure=true`

args := make([]string, 0)

args = append(args, "uv", "run", "dynamo", "start")
args = append(args, "cd", "src", "&&", "uv", "run", "dynamo", "start")

// todo : remove this line when https://github.com/ai-dynamo/dynamo/issues/345 is fixed
enableDependsOption := false
Expand Down Expand Up @@ -1815,7 +1815,7 @@ monitoring.options.insecure=true`

if opt.dynamoNimDeployment.Spec.ServiceName != "" {
args = append(args, []string{"--service-name", opt.dynamoNimDeployment.Spec.ServiceName}...)
args = append(args, "src."+opt.dynamoNimDeployment.Spec.DynamoTag)
args = append(args, opt.dynamoNimDeployment.Spec.DynamoTag)
}

yataiResources := opt.dynamoNimDeployment.Spec.Resources
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2530,8 +2530,17 @@ echo "Done"
Value: strings.Join(buildkitdFlags, " "),
})
}
command = []string{"buildctl-daemonless.sh"}
buildkitURL := os.Getenv("BUILDKIT_URL")
if buildkitURL == "" {
err = errors.New("BUILDKIT_URL is not set")
return
}
command = []string{
"buildctl",
}
args = []string{
"--addr",
buildkitURL,
"build",
"--frontend",
"dockerfile.v0",
Expand All @@ -2543,11 +2552,10 @@ echo "Done"
output,
}
cacheRepo := os.Getenv("BUILDKIT_CACHE_REPO")
if cacheRepo == "" {
cacheRepo = opt.ImageInfo.DockerRegistry.BentosRepositoryURIInCluster
if cacheRepo != "" {
args = append(args, "--export-cache", fmt.Sprintf("type=registry,ref=%s:buildcache,mode=max,compression=zstd,ignore-error=true", cacheRepo))
args = append(args, "--import-cache", fmt.Sprintf("type=registry,ref=%s:buildcache", cacheRepo))
}
args = append(args, "--export-cache", fmt.Sprintf("type=registry,ref=%s:buildcache,mode=max,compression=zstd,ignore-error=true", cacheRepo))
args = append(args, "--import-cache", fmt.Sprintf("type=registry,ref=%s:buildcache", cacheRepo))
}

var builderContainerSecurityContext *corev1.SecurityContext
Expand Down
2 changes: 1 addition & 1 deletion deploy/dynamo/operator/internal/nim/nim.go
Original file line number Diff line number Dif CEB7 f line change
Expand Up @@ -258,7 +258,7 @@ func GenerateDynamoNIMDeployments(parentDynamoDeployment *v1alpha1.DynamoDeploym
deployment.Name = fmt.Sprintf("%s-%s", parentDynamoDeployment.Name, strings.ToLower(service.Name))
deployment.Namespace = parentDynamoDeployment.Namespace
deployment.Spec.DynamoTag = config.DynamoTag
deployment.Spec.DynamoNim = strings.Split(parentDynamoDeployment.Spec.DynamoNim, ":")[0]
deployment.Spec.DynamoNim = strings.ReplaceAll(parentDynamoDeployment.Spec.DynamoNim, ":", "--")
deployment.Spec.ServiceName = service.Name
if service.Config.Dynamo != nil && service.Config.Dynamo.Enabled {
dynamoServices[service.Name] = fmt.Sprintf("%s/%s", service.Config.Dynamo.Name, service.Config.Dynamo.Namespace)
Expand Down
8 changes: 4 additions & 4 deletions deploy/dynamo/operator/internal/nim/nim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
Namespace: "default",
},
Spec: v1alpha1.DynamoNimDeploymentSpec{
DynamoNim: "dynamonim",
DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService1",
ServiceName: "service1",
Resources: &compounaiCommon.Resources{
Expand Down Expand Up @@ -127,7 +127,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
Namespace: "default",
},
Spec: v1alpha1.DynamoNimDeploymentSpec{
DynamoNim: "dynamonim",
DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService1",
ServiceName: "service2",
},
Expand Down Expand Up @@ -188,7 +188,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
Namespace: "default",
},
Spec: v1alpha1.DynamoNimDeploymentSpec{
DynamoNim: "dynamonim",
DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService2",
ServiceName: "service1",
Resources: &compounaiCommon.Resources{
Expand Down Expand Up @@ -227,7 +227,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
Namespace: "default",
},
Spec: v1alpha1.DynamoNimDeploymentSpec{
DynamoNim: "dynamonim",
DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService2",
ServiceName: "service2",
},
Expand Down
Loading
0