Hi Guys, I Managed To Set Up A Kubernetes Cluster And Install Trains Into It. While Testing My Set-Up I Run The Test_Reporting.Py Example

Unanswered

` sdk {
# TRAINS - default SDK configuration

storage {
    cache {
        # Defaults to system temp folder / cache
        default_base_dir: "~/.trains/cache"
    }

    direct_access: [
        # Objects matching are considered to be available for direct access, i.e. they will not be downloaded
        # or cached, and any download request will return a direct reference.
        # Objects are specified in glob format, available for url and content_type.
        { url: "file://*" }  # file-urls are always directly referenced
    ]
}

metrics {
    # History size for debug files per metric/variant. For each metric/variant combination with an attached file
    # (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
    # X files are stored in the upload destination for each metric/variant combination.
    file_history_size: 100

    # Max history size for matplotlib imshow files per plot title.
    # File names for the uploaded images will be recycled in such a way that no more than
    # X images are stored in the upload destination for each matplotlib plot title.
    matplotlib_untitled_history_size: 100

    # Limit the number of digits after the dot in plot reporting (reducing plot report size)
    # plot_max_num_digits: 5

    # Settings for generated debug images
    images {
        format: JPEG
        quality: 87
        subsampling: 0
    }

    # Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to true, each series should have its own graph)
    tensorboard_single_series_per_graph: false
}

network {
    metrics {
        # Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
        # a specific iteration
        file_upload_threads: 4

        # Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
        # being sent for upload
        file_upload_starvation_warning_sec: 120
    }

    iteration {
        # Max number of retries when getting frames if the server returned an error (http code 500)
        max_retries_on_server_error: 5
        # Backoff factory for consecutive retry attempts.
        # SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
        retry_backoff_factor_sec: 10
    }
}
aws {
    s3 {
        # S3 credentials, used for read/write access by various SDK elements

        # default, used for any bucket not specified below
        key: ""
        secret: ""
        region: ""

        credentials: [
            # specifies key/secret credentials to use when handling s3 urls (read or write)
            # {
            #     bucket: "my-bucket-name"
            #     key: "my-access-key"
            #     secret: "my-secret-key"
            # },
            # {
            #     # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
            #     host: "my-minio-host:9000"
            #     key: "12345678"
            #     secret: "12345678"
            #     multipart: false
            #     secure: false
            # }
        ]
    }
    boto3 {
        pool_connections: 512
        max_multipart_concurrency: 16
    }
}
google.storage {
    # # Default project and credentials file
    # # Will be used when no bucket configuration is found
    # project: "trains"
    # credentials_json: "/path/to/credentials.json"

    # # Specific credentials per bucket and sub directory
    # credentials = [
    #     {
    #         bucket: "my-bucket"
    #         subdir: "path/in/bucket" # Not required
    #         project: "trains"
    #         credentials_json: "/path/to/credentials.json"
    #     },
    # ]
}
azure.storage {
    # containers: [
    #     {
    #         account_name: "trains"
    #         account_key: "secret"
    #         # container_name:
    #     }
    # ]
}

log {
    # debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
    null_log_propagate: false
    task_log_buffer_capacity: 66

    # disable urllib info and lower levels
    disable_urllib3_info: true
}

development {
    # Development-mode options

    # dev task reuse window
    task_reuse_time_window_in_hours: 72.0

    # Run VCS repository detection asynchronously
    vcs_repo_detect_async: true

    # Store uncommitted git/hg source code diff in experiment manifest when training in development mode
    # This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
    store_uncommitted_code_diff: true

    # Support stopping an experiment in case it was externally stopped, status was changed or task was reset
    support_stopping: true

    # Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
    default_output_uri: ""

    # Default auto generated requirements optimize for smaller requirements
    # If True, analyze the entire repository regardless of the entry point.
    # If False, first analyze the entry point script, if it does not contain other to local files,
    # do not analyze the entire repository.
    force_analyze_entire_repo: false

    # If set to true, *trains* update message will not be printed to the console
    # this value can be overwritten with os environment variable TRAINS_SUPPRESS_UPDATE_MESSAGE=1
    suppress_update_message: false

    # If this flag is true (default is false), instead of analyzing the code with Pigar, analyze with `pip freeze`
    detect_with_pip_freeze: false

    # Log specific environment variables. OS environments are enlisted in the "Environment" section
    # of the Hyper-Parameters.
    # multiple selected variables are supported including the suffix '*'.
    # For example: "AWS_*" will log any OS environment variable starting with 'AWS_'.
    # This value can be overwritten with os environment variable TRAINS_LOG_ENVIRONMENT="[AWS_*, CUDA_VERSION]"
    # Example: log_os_environments: ["AWS_*", "CUDA_VERSION"]
    log_os_environments: []

    # Development mode worker
    worker {
        # Status report period in seconds
        report_period_sec: 2

        # ping to the server - check connectivity
        ping_period_sec: 30

        # Log all stdout & stderr
        log_stdout: true

        # compatibility feature, report memory usage for the entire machine
        # default (false), report only on the running process and its sub-processes
        report_global_mem_used: false
    }
}

} `

  				
Posted 
	4 years ago

					More
				  		
  Report
		
					WickedGoat98
				
					0
					 × 1

267 Views

0 Answers

4 years ago

2 years ago