Examples: query, "exact match", wildcard*, wild?ard, wild*rd
Fuzzy search: cake~ (finds cakes, bake)
Term boost: "red velvet"^4, chocolate^2
Field grouping: tags:(+work -"fun-stuff")
Escaping: Escape characters +-&|!(){}[]^"~*?:\ with \, e.g. \+
Range search: properties.timestamp:[1587729413488 TO *] (inclusive), properties.title:{A TO Z}(excluding A and Z)
Combinations: chocolate AND vanilla, chocolate OR vanilla, (chocolate OR vanilla) NOT "vanilla pudding"
Field search: properties.title:"The Title" AND text
Answered
Anyonw Know How Can I Make Clearml Server Serving File Urls With External Domain, And Not The Internal Kubernetes Cluster Hostnames? Runnin Both The Server And Agents In K8S On-Prem, The Url Giving Is Not Reachable Because It Try To Present It As The Url

Anyonw know how can I make ClearML server serving file URLs with external domain, and not the internal Kubernetes cluster hostnames?
Runnin both the server and agents in k8s on-prem, the URL giving is not reachable because it try to present it as the URL giving on

agentk8sglue:
  fileServerUrlReference: "
"

Instead of the ingress of the fileserver.

Help?
image

  
  
Posted 3 months ago
Votes Newest

Answers 10


root@master:/home/bialek# kubectl -n clearml describe po clearml-webserver-847d7c947b-hfk57   
Name:             clearml-webserver-847d7c947b-hfk57
Namespace:        clearml
Priority:         0
Service Account:  clearml-webserver
Node:             clearml-server/secretip
Start Time:       Sun, 04 May 2025 08:42:17 +0300
Labels:           app.kubernetes.io/instance=clearml-webserver
                  app.kubernetes.io/name=clearml
                  pod-template-hash=847d7c947b
Annotations:      cni.projectcalico.org/containerID: ebe61ed8108db66e290732f0039ccd346ef3c47fc4a71c0e9b3c70588c9f4f18
                  cni.projectcalico.org/podIP: secretip
                  cni.projectcalico.org/podIPs: secretip
Status:           Running
IP:               secretip
IPs:
  IP:           secretip
Controlled By:  ReplicaSet/clearml-webserver-847d7c947b
Init Containers:
  init-webserver:
    Container ID:  
0
    Image:         docker.io/allegroai/clearml:2.0.0-613
    Image ID:      docker.io/allegroai/clearml@sha256:713ae38f7dafc9b2be703d89f16017102f5660a0c97cef65c793847f742924c8
    Port:          <none>
    Host Port:     <none>
    Command:
      /bin/sh
      -c
      set -x; while [ $(curl -sw '%{http_code}' "
" -o /dev/null) -ne 200 ] ; do
        echo "waiting for apiserver" ;
        sleep 5 ;
      done
      
    State:          Terminated
      Reason:       Completed
      Exit Code:    0
      Started:      Sun, 04 May 2025 08:42:27 +0300
      Finished:     Sun, 04 May 2025 08:44:27 +0300
    Ready:          True
    Restart Count:  0
    Limits:
      cpu:     10m
      memory:  64Mi
    Requests:
      cpu:        10m
      memory:     64Mi
    Environment:  <none>
    Mounts:
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-wmf25 (ro)
Containers:
  clearml-webserver:
    Container ID:  
2
    Image:         docker.io/allegroai/clearml:2.0.0-613
    Image ID:      docker.io/allegroai/clearml@sha256:713ae38f7dafc9b2be703d89f16017102f5660a0c97cef65c793847f742924c8
    Port:          80/TCP
    Host Port:     0/TCP
    Args:
      webserver
    State:          Running
      Started:      Sun, 04 May 2025 08:44:28 +0300
    Ready:          True
    Restart Count:  0
    Limits:
      cpu:     2
      memory:  1Gi
    Requests:
      cpu:      100m
      memory:   256Mi
    Liveness:   exec [curl -X OPTIONS 
] delay=0s timeout=1s period=10s #success=1 #failure=3
    Readiness:  exec [curl -X OPTIONS 
] delay=0s timeout=1s period=10s #success=1 #failure=3
    Environment:
      NGINX_APISERVER_ADDRESS:   

      NGINX_FILESERVER_ADDRESS:  

    Mounts:
      /mnt/external_files/configs from webserver-config (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-wmf25 (ro)
Conditions:
  Type                        Status
  PodReadyToStartContainers   True 
  Initialized                 True 
  Ready                       True 
  ContainersReady             True 
  PodScheduled                True 
Volumes:
  webserver-config:
    Type:      ConfigMap (a volume populated by a ConfigMap)
    Name:      clearml-webserver-configmap
    Optional:  false
  kube-api-access-wmf25:
    Type:                    Projected (a volume that contains injected data from multiple sources)
    TokenExpirationSeconds:  3607
    ConfigMapName:           kube-root-ca.crt
    ConfigMapOptional:       <nil>
    DownwardAPI:             true
QoS Class:                   Burstable
Node-Selectors:              kubernetes.io/hostname=clearml-server
Tolerations:                 node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                             node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
                             server4clearml=true:NoSchedule
Events:                      <none>
root@master:/home/bialek# 
  
  
Posted 3 months ago

So, when the UI gets a debug image, it gets the URL for that image, which was created in runtime by the running SDK (by the Agent, in this case), so using the fileserver URL provided by the agent.
You will need to pass the external reference:

agentk8sglue:
  fileServerUrlReference: "
"

and work around the self-signed cert. You could try mounting your custom certificates to the Agent using volumes and volumeMounts, storing your certificate in a configmap or similarly

  
  
Posted 3 months ago

@<1726047624538099712:profile|WorriedSwan6> could you please run a kubectl describe pod of the clearml webserver Pod and dump the output here?

  
  
Posted 3 months ago

lol I ended up just buying the domain. it is much easier to pay the 10$ : ))

  
  
Posted 3 months ago

That works as well LOL :))

  
  
Posted 3 months ago

Hi Amir, could you please share the values override that you used to install the clearml server helm chart?

  
  
Posted 3 months ago

sure, here:

clearml:
  defaultCompany: "bialek"
  cookieDomain: "bialek.dev"

nameOverride: "clearml"
fullnameOverride: "clearml"

apiserver:
  existingAdditionalConfigsSecret: "eso-clearml-users"  
  additionalConfigs:
    clearml.conf: |
      agent {
        file_server_url: 

      }
  service:
    type: ClusterIP
  ingress:
    enabled: true
    ingressClassName: "bialek-on-prem"
    hostName: "clearml-api.bialek.dev"
    tlsSecretName: "tls-clearml-apiserver"
    annotations: 
      cert-manager.io/cluster-issuer: bialek-dev-issuer
    path: "/"
  nodeSelector:
    kubernetes.io/hostname: clearml-server
  tolerations: 
  - key: "server4clearml"
    operator: "Equal"
    value: "true"
    effect: "NoSchedule"

fileserver:
  service:
    type: ClusterIP
  ingress:
    enabled: true
    ingressClassName: "bialek-on-prem"
    hostName: "clearml-file.bialek.dev"
    tlsSecretName: "tls-clearml-fileserver"
    annotations: 
      cert-manager.io/cluster-issuer: bialek-dev-issuer
    path: "/"
  storage:
    enabled: true
    data:
      size: 100Gi
  nodeSelector:
    kubernetes.io/hostname: clearml-server
  tolerations: 
  - key: "server4clearml"
    operator: "Equal"
    value: "true"
    effect: "NoSchedule"

webserver:
  service:
    type: ClusterIP
  ingress:
    enabled: true
    ingressClassName: "bialek-on-prem"
    hostName: "clearml.bialek.dev"
    tlsSecretName: "tls-clearml-webserver"
    annotations: 
      cert-manager.io/cluster-issuer: bialek-dev-issuer
    path: "/"
  nodeSelector:
    kubernetes.io/hostname: clearml-server
  tolerations: 
  - key: "server4clearml"
    operator: "Equal"
    value: "true"
    effect: "NoSchedule"
  extraEnvVars:
    - name: WEBSERVER__fileBaseUrl
      value: "
"
    - name: WEBSERVER__useFilesProxy
      value: "true"



redis:
  architecture: replication
  master:
    nodeSelector:
      kubernetes.io/hostname: clearml-server
    tolerations:
    - key: "server4clearml"
      operator: "Equal"
      value: "true"
      effect: "NoSchedule"
    persistence:
      enabled: true
      accessModes:
        - ReadWriteOnce
      size: 5Gi
      ## If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner
      storageClass: null
  replica:
    replicaCount: 2
    nodeSelector:
      kubernetes.io/hostname: clearml-server
    tolerations:
    - key: "server4clearml"
      operator: "Equal"
      value: "true"
      effect: "NoSchedule"

mongodb:
  updateStrategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 50%
      maxUnavailable: 50%
  podSecurityContext:
    enabled: true
    fsGroup: 1001
  resources: {}
  enabled: true
  nodeSelector:
    kubernetes.io/hostname: clearml-server
  tolerations: 
  - key: "server4clearml"
    operator: "Equal"
    value: "true"
    effect: "NoSchedule"
  architecture: replicaset
  replicaCount: 1
  arbiter:
    enabled: false
  pdb:
    create: true
  podAntiAffinityPreset: soft
  
elasticsearch:
  replicas: 1
  minimumMasterNodes: 1
  antiAffinityTopologyKey: ""
  antiAffinity: ""
  nodeAffinity: {}
  nodeSelector: 
    kubernetes.io/hostname: clearml-server
  tolerations: 
  - key: "server4clearml"
    operator: "Equal"
    value: "true"
    effect: "NoSchedule"
  volumeClaimTemplate:
    accessModes: ["ReadWriteOnce"]
    resources:
      requests:
        storage: 30Gi

  extraVolumes:
    - name: nfs
      nfs:
        path: /path/to/nfs
        server: <some_private_ip>

  extraVolumeMounts:
    - name: nfs
      mountPath: /mnt/backups
      readOnly: false

  esConfig:
    elasticsearch.yml: |
      xpack.security.enabled: false
      path.repo: ["/mnt/backups"]
  
  
Posted 3 months ago

@<1726047624538099712:profile|WorriedSwan6> - When deploying the ClearML Agent, could you try passing the external fileserver url to the configuration you previously mentioned? Like this:

agentk8sglue:
  fileServerUrlReference: "
"
  
  
Posted 3 months ago

Hey @<1729671499981262848:profile|CooperativeKitten94> , but this is internal domain, which cause an issue with the SSL when trying to upload data to the server:

2025-05-07 18:36:22,421 - clearml.storage - ERROR - Exception encountered while uploading HTTPSConnectionPool(host='clearml-file.bialek.dev', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))

..

  
  
Posted 3 months ago

Oh, I see, cause you are using a self-signed certificate, correct?

  
  
Posted 3 months ago
498 Views
10 Answers
3 months ago
3 months ago
Tags