Describe Training Job
sagemaker_describe_training_job | R Documentation |
Returns information about a training job¶
Description¶
Returns information about a training job.
Some of the attributes below only appear if the training job
successfully starts. If the training job fails, TrainingJobStatus
is
Failed
and, depending on the FailureReason
, attributes like
TrainingStartTime
, TrainingTimeInSeconds
, TrainingEndTime
, and
BillableTimeInSeconds
may not be present in the response.
Usage¶
Arguments¶
TrainingJobName
[required] The name of the training job.
Value¶
A list with the following syntax:
list(
TrainingJobName = "string",
TrainingJobArn = "string",
TuningJobArn = "string",
LabelingJobArn = "string",
AutoMLJobArn = "string",
ModelArtifacts = list(
S3ModelArtifacts = "string"
),
TrainingJobStatus = "InProgress"|"Completed"|"Failed"|"Stopping"|"Stopped",
SecondaryStatus = "Starting"|"LaunchingMLInstances"|"PreparingTrainingStack"|"Downloading"|"DownloadingTrainingImage"|"Training"|"Uploading"|"Stopping"|"Stopped"|"MaxRuntimeExceeded"|"Completed"|"Failed"|"Interrupted"|"MaxWaitTimeExceeded"|"Updating"|"Restarting"|"Pending",
FailureReason = "string",
HyperParameters = list(
"string"
),
AlgorithmSpecification = list(
TrainingImage = "string",
AlgorithmName = "string",
TrainingInputMode = "Pipe"|"File"|"FastFile",
MetricDefinitions = list(
list(
Name = "string",
Regex = "string"
)
),
EnableSageMakerMetricsTimeSeries = TRUE|FALSE,
ContainerEntrypoint = list(
"string"
),
ContainerArguments = list(
"string"
),
TrainingImageConfig = list(
TrainingRepositoryAccessMode = "Platform"|"Vpc",
TrainingRepositoryAuthConfig = list(
TrainingRepositoryCredentialsProviderArn = "string"
)
)
),
RoleArn = "string",
InputDataConfig = list(
list(
ChannelName = "string",
DataSource = list(
S3DataSource = list(
S3DataType = "ManifestFile"|"S3Prefix"|"AugmentedManifestFile",
S3Uri = "string",
S3DataDistributionType = "FullyReplicated"|"ShardedByS3Key",
AttributeNames = list(
"string"
),
InstanceGroupNames = list(
"string"
)
),
FileSystemDataSource = list(
FileSystemId = "string",
FileSystemAccessMode = "rw"|"ro",
FileSystemType = "EFS"|"FSxLustre",
DirectoryPath = "string"
)
),
ContentType = "string",
CompressionType = "None"|"Gzip",
RecordWrapperType = "None"|"RecordIO",
InputMode = "Pipe"|"File"|"FastFile",
ShuffleConfig = list(
Seed = 123
)
)
),
OutputDataConfig = list(
KmsKeyId = "string",
S3OutputPath = "string",
CompressionType = "GZIP"|"NONE"
),
ResourceConfig = list(
InstanceType = "ml.m4.xlarge"|"ml.m4.2xlarge"|"ml.m4.4xlarge"|"ml.m4.10xlarge"|"ml.m4.16xlarge"|"ml.g4dn.xlarge"|"ml.g4dn.2xlarge"|"ml.g4dn.4xlarge"|"ml.g4dn.8xlarge"|"ml.g4dn.12xlarge"|"ml.g4dn.16xlarge"|"ml.m5.large"|"ml.m5.xlarge"|"ml.m5.2xlarge"|"ml.m5.4xlarge"|"ml.m5.12xlarge"|"ml.m5.24xlarge"|"ml.c4.xlarge"|"ml.c4.2xlarge"|"ml.c4.4xlarge"|"ml.c4.8xlarge"|"ml.p2.xlarge"|"ml.p2.8xlarge"|"ml.p2.16xlarge"|"ml.p3.2xlarge"|"ml.p3.8xlarge"|"ml.p3.16xlarge"|"ml.p3dn.24xlarge"|"ml.p4d.24xlarge"|"ml.p4de.24xlarge"|"ml.p5.48xlarge"|"ml.c5.xlarge"|"ml.c5.2xlarge"|"ml.c5.4xlarge"|"ml.c5.9xlarge"|"ml.c5.18xlarge"|"ml.c5n.xlarge"|"ml.c5n.2xlarge"|"ml.c5n.4xlarge"|"ml.c5n.9xlarge"|"ml.c5n.18xlarge"|"ml.g5.xlarge"|"ml.g5.2xlarge"|"ml.g5.4xlarge"|"ml.g5.8xlarge"|"ml.g5.16xlarge"|"ml.g5.12xlarge"|"ml.g5.24xlarge"|"ml.g5.48xlarge"|"ml.trn1.2xlarge"|"ml.trn1.32xlarge"|"ml.trn1n.32xlarge"|"ml.m6i.large"|"ml.m6i.xlarge"|"ml.m6i.2xlarge"|"ml.m6i.4xlarge"|"ml.m6i.8xlarge"|"ml.m6i.12xlarge"|"ml.m6i.16xlarge"|"ml.m6i.24xlarge"|"ml.m6i.32xlarge"|"ml.c6i.xlarge"|"ml.c6i.2xlarge"|"ml.c6i.8xlarge"|"ml.c6i.4xlarge"|"ml.c6i.12xlarge"|"ml.c6i.16xlarge"|"ml.c6i.24xlarge"|"ml.c6i.32xlarge"|"ml.r5d.large"|"ml.r5d.xlarge"|"ml.r5d.2xlarge"|"ml.r5d.4xlarge"|"ml.r5d.8xlarge"|"ml.r5d.12xlarge"|"ml.r5d.16xlarge"|"ml.r5d.24xlarge"|"ml.t3.medium"|"ml.t3.large"|"ml.t3.xlarge"|"ml.t3.2xlarge"|"ml.r5.large"|"ml.r5.xlarge"|"ml.r5.2xlarge"|"ml.r5.4xlarge"|"ml.r5.8xlarge"|"ml.r5.12xlarge"|"ml.r5.16xlarge"|"ml.r5.24xlarge",
InstanceCount = 123,
VolumeSizeInGB = 123,
VolumeKmsKeyId = "string",
KeepAlivePeriodInSeconds = 123,
InstanceGroups = list(
list(
InstanceType = "ml.m4.xlarge"|"ml.m4.2xlarge"|"ml.m4.4xlarge"|"ml.m4.10xlarge"|"ml.m4.16xlarge"|"ml.g4dn.xlarge"|"ml.g4dn.2xlarge"|"ml.g4dn.4xlarge"|"ml.g4dn.8xlarge"|"ml.g4dn.12xlarge"|"ml.g4dn.16xlarge"|"ml.m5.large"|"ml.m5.xlarge"|"ml.m5.2xlarge"|"ml.m5.4xlarge"|"ml.m5.12xlarge"|"ml.m5.24xlarge"|"ml.c4.xlarge"|"ml.c4.2xlarge"|"ml.c4.4xlarge"|"ml.c4.8xlarge"|"ml.p2.xlarge"|"ml.p2.8xlarge"|"ml.p2.16xlarge"|"ml.p3.2xlarge"|"ml.p3.8xlarge"|"ml.p3.16xlarge"|"ml.p3dn.24xlarge"|"ml.p4d.24xlarge"|"ml.p4de.24xlarge"|"ml.p5.48xlarge"|"ml.c5.xlarge"|"ml.c5.2xlarge"|"ml.c5.4xlarge"|"ml.c5.9xlarge"|"ml.c5.18xlarge"|"ml.c5n.xlarge"|"ml.c5n.2xlarge"|"ml.c5n.4xlarge"|"ml.c5n.9xlarge"|"ml.c5n.18xlarge"|"ml.g5.xlarge"|"ml.g5.2xlarge"|"ml.g5.4xlarge"|"ml.g5.8xlarge"|"ml.g5.16xlarge"|"ml.g5.12xlarge"|"ml.g5.24xlarge"|"ml.g5.48xlarge"|"ml.trn1.2xlarge"|"ml.trn1.32xlarge"|"ml.trn1n.32xlarge"|"ml.m6i.large"|"ml.m6i.xlarge"|"ml.m6i.2xlarge"|"ml.m6i.4xlarge"|"ml.m6i.8xlarge"|"ml.m6i.12xlarge"|"ml.m6i.16xlarge"|"ml.m6i.24xlarge"|"ml.m6i.32xlarge"|"ml.c6i.xlarge"|"ml.c6i.2xlarge"|"ml.c6i.8xlarge"|"ml.c6i.4xlarge"|"ml.c6i.12xlarge"|"ml.c6i.16xlarge"|"ml.c6i.24xlarge"|"ml.c6i.32xlarge"|"ml.r5d.large"|"ml.r5d.xlarge"|"ml.r5d.2xlarge"|"ml.r5d.4xlarge"|"ml.r5d.8xlarge"|"ml.r5d.12xlarge"|"ml.r5d.16xlarge"|"ml.r5d.24xlarge"|"ml.t3.medium"|"ml.t3.large"|"ml.t3.xlarge"|"ml.t3.2xlarge"|"ml.r5.large"|"ml.r5.xlarge"|"ml.r5.2xlarge"|"ml.r5.4xlarge"|"ml.r5.8xlarge"|"ml.r5.12xlarge"|"ml.r5.16xlarge"|"ml.r5.24xlarge",
InstanceCount = 123,
InstanceGroupName = "string"
)
)
),
WarmPoolStatus = list(
Status = "Available"|"Terminated"|"Reused"|"InUse",
ResourceRetainedBillableTimeInSeconds = 123,
ReusedByJob = "string"
),
VpcConfig = list(
SecurityGroupIds = list(
"string"
),
Subnets = list(
"string"
)
),
StoppingCondition = list(
MaxRuntimeInSeconds = 123,
MaxWaitTimeInSeconds = 123,
MaxPendingTimeInSeconds = 123
),
CreationTime = as.POSIXct(
"2015-01-01"
),
TrainingStartTime = as.POSIXct(
"2015-01-01"
),
TrainingEndTime = as.POSIXct(
"2015-01-01"
),
LastModifiedTime = as.POSIXct(
"2015-01-01"
),
SecondaryStatusTransitions = list(
list(
Status = "Starting"|"LaunchingMLInstances"|"PreparingTrainingStack"|"Downloading"|"DownloadingTrainingImage"|"Training"|"Uploading"|"Stopping"|"Stopped"|"MaxRuntimeExceeded"|"Completed"|"Failed"|"Interrupted"|"MaxWaitTimeExceeded"|"Updating"|"Restarting"|"Pending",
StartTime = as.POSIXct(
"2015-01-01"
),
EndTime = as.POSIXct(
"2015-01-01"
),
StatusMessage = "string"
)
),
FinalMetricDataList = list(
list(
MetricName = "string",
Value = 123.0,
Timestamp = as.POSIXct(
"2015-01-01"
)
)
),
EnableNetworkIsolation = TRUE|FALSE,
EnableInterContainerTrafficEncryption = TRUE|FALSE,
EnableManagedSpotTraining = TRUE|FALSE,
CheckpointConfig = list(
S3Uri = "string",
LocalPath = "string"
),
TrainingTimeInSeconds = 123,
BillableTimeInSeconds = 123,
DebugHookConfig = list(
LocalPath = "string",
S3OutputPath = "string",
HookParameters = list(
"string"
),
CollectionConfigurations = list(
list(
CollectionName = "string",
CollectionParameters = list(
"string"
)
)
)
),
ExperimentConfig = list(
ExperimentName = "string",
TrialName = "string",
TrialComponentDisplayName = "string",
RunName = "string"
),
DebugRuleConfigurations = list(
list(
RuleConfigurationName = "string",
LocalPath = "string",
S3OutputPath = "string",
RuleEvaluatorImage = "string",
InstanceType = "ml.t3.medium"|"ml.t3.large"|"ml.t3.xlarge"|"ml.t3.2xlarge"|"ml.m4.xlarge"|"ml.m4.2xlarge"|"ml.m4.4xlarge"|"ml.m4.10xlarge"|"ml.m4.16xlarge"|"ml.c4.xlarge"|"ml.c4.2xlarge"|"ml.c4.4xlarge"|"ml.c4.8xlarge"|"ml.p2.xlarge"|"ml.p2.8xlarge"|"ml.p2.16xlarge"|"ml.p3.2xlarge"|"ml.p3.8xlarge"|"ml.p3.16xlarge"|"ml.c5.xlarge"|"ml.c5.2xlarge"|"ml.c5.4xlarge"|"ml.c5.9xlarge"|"ml.c5.18xlarge"|"ml.m5.large"|"ml.m5.xlarge"|"ml.m5.2xlarge"|"ml.m5.4xlarge"|"ml.m5.12xlarge"|"ml.m5.24xlarge"|"ml.r5.large"|"ml.r5.xlarge"|"ml.r5.2xlarge"|"ml.r5.4xlarge"|"ml.r5.8xlarge"|"ml.r5.12xlarge"|"ml.r5.16xlarge"|"ml.r5.24xlarge"|"ml.g4dn.xlarge"|"ml.g4dn.2xlarge"|"ml.g4dn.4xlarge"|"ml.g4dn.8xlarge"|"ml.g4dn.12xlarge"|"ml.g4dn.16xlarge"|"ml.g5.xlarge"|"ml.g5.2xlarge"|"ml.g5.4xlarge"|"ml.g5.8xlarge"|"ml.g5.16xlarge"|"ml.g5.12xlarge"|"ml.g5.24xlarge"|"ml.g5.48xlarge"|"ml.r5d.large"|"ml.r5d.xlarge"|"ml.r5d.2xlarge"|"ml.r5d.4xlarge"|"ml.r5d.8xlarge"|"ml.r5d.12xlarge"|"ml.r5d.16xlarge"|"ml.r5d.24xlarge",
VolumeSizeInGB = 123,
RuleParameters = list(
"string"
)
)
),
TensorBoardOutputConfig = list(
LocalPath = "string",
S3OutputPath = "string"
),
DebugRuleEvaluationStatuses = list(
list(
RuleConfigurationName = "string",
RuleEvaluationJobArn = "string",
RuleEvaluationStatus = "InProgress"|"NoIssuesFound"|"IssuesFound"|"Error"|"Stopping"|"Stopped",
StatusDetails = "string",
LastModifiedTime = as.POSIXct(
"2015-01-01"
)
)
),
ProfilerConfig = list(
S3OutputPath = "string",
ProfilingIntervalInMilliseconds = 123,
ProfilingParameters = list(
"string"
),
DisableProfiler = TRUE|FALSE
),
ProfilerRuleConfigurations = list(
list(
RuleConfigurationName = "string",
LocalPath = "string",
S3OutputPath = "string",
RuleEvaluatorImage = "string",
InstanceType = "ml.t3.medium"|"ml.t3.large"|"ml.t3.xlarge"|"ml.t3.2xlarge"|"ml.m4.xlarge"|"ml.m4.2xlarge"|"ml.m4.4xlarge"|"ml.m4.10xlarge"|"ml.m4.16xlarge"|"ml.c4.xlarge"|"ml.c4.2xlarge"|"ml.c4.4xlarge"|"ml.c4.8xlarge"|"ml.p2.xlarge"|"ml.p2.8xlarge"|"ml.p2.16xlarge"|"ml.p3.2xlarge"|"ml.p3.8xlarge"|"ml.p3.16xlarge"|"ml.c5.xlarge"|"ml.c5.2xlarge"|"ml.c5.4xlarge"|"ml.c5.9xlarge"|"ml.c5.18xlarge"|"ml.m5.large"|"ml.m5.xlarge"|"ml.m5.2xlarge"|"ml.m5.4xlarge"|"ml.m5.12xlarge"|"ml.m5.24xlarge"|"ml.r5.large"|"ml.r5.xlarge"|"ml.r5.2xlarge"|"ml.r5.4xlarge"|"ml.r5.8xlarge"|"ml.r5.12xlarge"|"ml.r5.16xlarge"|"ml.r5.24xlarge"|"ml.g4dn.xlarge"|"ml.g4dn.2xlarge"|"ml.g4dn.4xlarge"|"ml.g4dn.8xlarge"|"ml.g4dn.12xlarge"|"ml.g4dn.16xlarge"|"ml.g5.xlarge"|"ml.g5.2xlarge"|"ml.g5.4xlarge"|"ml.g5.8xlarge"|"ml.g5.16xlarge"|"ml.g5.12xlarge"|"ml.g5.24xlarge"|"ml.g5.48xlarge"|"ml.r5d.large"|"ml.r5d.xlarge"|"ml.r5d.2xlarge"|"ml.r5d.4xlarge"|"ml.r5d.8xlarge"|"ml.r5d.12xlarge"|"ml.r5d.16xlarge"|"ml.r5d.24xlarge",
VolumeSizeInGB = 123,
RuleParameters = list(
"string"
)
)
),
ProfilerRuleEvaluationStatuses = list(
list(
RuleConfigurationName = "string",
RuleEvaluationJobArn = "string",
RuleEvaluationStatus = "InProgress"|"NoIssuesFound"|"IssuesFound"|"Error"|"Stopping"|"Stopped",
StatusDetails = "string",
LastModifiedTime = as.POSIXct(
"2015-01-01"
)
)
),
ProfilingStatus = "Enabled"|"Disabled",
Environment = list(
"string"
),
RetryStrategy = list(
MaximumRetryAttempts = 123
),
RemoteDebugConfig = list(
EnableRemoteDebug = TRUE|FALSE
),
InfraCheckConfig = list(
EnableInfraCheck = TRUE|FALSE
)
)