Create Processing Job
sagemaker_create_processing_job | R Documentation |
Creates a processing job¶
Description¶
Creates a processing job.
Usage¶
sagemaker_create_processing_job(ProcessingInputs,
ProcessingOutputConfig, ProcessingJobName, ProcessingResources,
StoppingCondition, AppSpecification, Environment, NetworkConfig,
RoleArn, Tags, ExperimentConfig)
Arguments¶
ProcessingInputs
An array of inputs configuring the data to download into the processing container.
ProcessingOutputConfig
Output configuration for the processing job.
ProcessingJobName
[required] The name of the processing job. The name must be unique within an Amazon Web Services Region in the Amazon Web Services account.
ProcessingResources
[required] Identifies the resources, ML compute instances, and ML storage volumes to deploy for a processing job. In distributed training, you specify more than one instance.
StoppingCondition
The time limit for how long the processing job is allowed to run.
AppSpecification
[required] Configures the processing job to run a specified Docker container image.
Environment
The environment variables to set in the Docker container. Up to 100 key and values entries in the map are supported.
NetworkConfig
Networking options for a processing job, such as whether to allow inbound and outbound network calls to and from processing containers, and the VPC subnets and security groups to use for VPC-enabled processing jobs.
RoleArn
[required] The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker can assume to perform tasks on your behalf.
Tags
(Optional) An array of key-value pairs. For more information, see Using Cost Allocation Tags in the Amazon Web Services Billing and Cost Management User Guide.
ExperimentConfig
Value¶
A list with the following syntax:
Request syntax¶
svc$create_processing_job(
ProcessingInputs = list(
list(
InputName = "string",
AppManaged = TRUE|FALSE,
S3Input = list(
S3Uri = "string",
LocalPath = "string",
S3DataType = "ManifestFile"|"S3Prefix",
S3InputMode = "Pipe"|"File",
S3DataDistributionType = "FullyReplicated"|"ShardedByS3Key",
S3CompressionType = "None"|"Gzip"
),
DatasetDefinition = list(
AthenaDatasetDefinition = list(
Catalog = "string",
Database = "string",
QueryString = "string",
WorkGroup = "string",
OutputS3Uri = "string",
KmsKeyId = "string",
OutputFormat = "PARQUET"|"ORC"|"AVRO"|"JSON"|"TEXTFILE",
OutputCompression = "GZIP"|"SNAPPY"|"ZLIB"
),
RedshiftDatasetDefinition = list(
ClusterId = "string",
Database = "string",
DbUser = "string",
QueryString = "string",
ClusterRoleArn = "string",
OutputS3Uri = "string",
KmsKeyId = "string",
OutputFormat = "PARQUET"|"CSV",
OutputCompression = "None"|"GZIP"|"BZIP2"|"ZSTD"|"SNAPPY"
),
LocalPath = "string",
DataDistributionType = "FullyReplicated"|"ShardedByS3Key",
InputMode = "Pipe"|"File"
)
)
),
ProcessingOutputConfig = list(
Outputs = list(
list(
OutputName = "string",
S3Output = list(
S3Uri = "string",
LocalPath = "string",
S3UploadMode = "Continuous"|"EndOfJob"
),
FeatureStoreOutput = list(
FeatureGroupName = "string"
),
AppManaged = TRUE|FALSE
)
),
KmsKeyId = "string"
),
ProcessingJobName = "string",
ProcessingResources = list(
ClusterConfig = list(
InstanceCount = 123,
InstanceType = "ml.t3.medium"|"ml.t3.large"|"ml.t3.xlarge"|"ml.t3.2xlarge"|"ml.m4.xlarge"|"ml.m4.2xlarge"|"ml.m4.4xlarge"|"ml.m4.10xlarge"|"ml.m4.16xlarge"|"ml.c4.xlarge"|"ml.c4.2xlarge"|"ml.c4.4xlarge"|"ml.c4.8xlarge"|"ml.p2.xlarge"|"ml.p2.8xlarge"|"ml.p2.16xlarge"|"ml.p3.2xlarge"|"ml.p3.8xlarge"|"ml.p3.16xlarge"|"ml.c5.xlarge"|"ml.c5.2xlarge"|"ml.c5.4xlarge"|"ml.c5.9xlarge"|"ml.c5.18xlarge"|"ml.m5.large"|"ml.m5.xlarge"|"ml.m5.2xlarge"|"ml.m5.4xlarge"|"ml.m5.12xlarge"|"ml.m5.24xlarge"|"ml.r5.large"|"ml.r5.xlarge"|"ml.r5.2xlarge"|"ml.r5.4xlarge"|"ml.r5.8xlarge"|"ml.r5.12xlarge"|"ml.r5.16xlarge"|"ml.r5.24xlarge"|"ml.g4dn.xlarge"|"ml.g4dn.2xlarge"|"ml.g4dn.4xlarge"|"ml.g4dn.8xlarge"|"ml.g4dn.12xlarge"|"ml.g4dn.16xlarge"|"ml.g5.xlarge"|"ml.g5.2xlarge"|"ml.g5.4xlarge"|"ml.g5.8xlarge"|"ml.g5.16xlarge"|"ml.g5.12xlarge"|"ml.g5.24xlarge"|"ml.g5.48xlarge"|"ml.r5d.large"|"ml.r5d.xlarge"|"ml.r5d.2xlarge"|"ml.r5d.4xlarge"|"ml.r5d.8xlarge"|"ml.r5d.12xlarge"|"ml.r5d.16xlarge"|"ml.r5d.24xlarge",
VolumeSizeInGB = 123,
VolumeKmsKeyId = "string"
)
),
StoppingCondition = list(
MaxRuntimeInSeconds = 123
),
AppSpecification = list(
ImageUri = "string",
ContainerEntrypoint = list(
"string"
),
ContainerArguments = list(
"string"
)
),
Environment = list(
"string"
),
NetworkConfig = list(
EnableInterContainerTrafficEncryption = TRUE|FALSE,
EnableNetworkIsolation = TRUE|FALSE,
VpcConfig = list(
SecurityGroupIds = list(
"string"
),
Subnets = list(
"string"
)
)
),
RoleArn = "string",
Tags = list(
list(
Key = "string",
Value = "string"
)
),
ExperimentConfig = list(
ExperimentName = "string",
TrialName = "string",
TrialComponentDisplayName = "string",
RunName = "string"
)
)