Create and manage Amazon SageMaker jobs with Step Functions
Learn how to use Step Functions to create and manage jobs on SageMaker. This page lists the supported SageMaker API actions and provides example
Task
states to create SageMaker transform, training, labeling, and processing jobs.
Step Functions can control certain Amazon services directly from Using Amazon States Language to define Step Functions workflows (ASL). To learn more, see Integrating other services and Passing parameters to a service API in Step Functions.
How the Optimized SageMaker integration is different than the SageMaker Amazon SDK integration
-
The Run a Job (.sync) integration pattern is supported.
-
There are no optimizations for the Request Response integration pattern.
-
The Wait for a Callback with the Task Token integration pattern is not supported.
Supported SageMaker APIs and syntax:
-
-
Supported parameters:
-
-
Supported parameters:
-
Note
This API action supports the
.sync
integration pattern. -
Note
This API action supports the
.sync
integration pattern. -
-
Supported parameters:
-
Note
This API action supports the
.sync
integration pattern. -
Note
This API action supports the
.sync
integration pattern. -
Note
This API action supports the
.sync
integration pattern.Note
Amazon Step Functions will not automatically create a policy for
CreateTransformJob
. You must attach an inline policy to the created role. For more information, see this example IAM policy: CreateTrainingJob. -
-
Supported parameters:
SageMaker Transform Job Example
The following includes a Task
state that creates an Amazon SageMaker transform
job, specifying the Amazon S3 location for DataSource
and
TransformOutput
.
{
"SageMaker CreateTransformJob": {
"Type": "Task",
"Resource": "arn:aws:states:::sagemaker:createTransformJob.sync",
"Parameters": {
"ModelName": "SageMakerCreateTransformJobModel-9iFBKsYti9vr",
"TransformInput": {
"CompressionType": "None",
"ContentType": "text/csv",
"DataSource": {
"S3DataSource": {
"S3DataType": "S3Prefix",
"S3Uri": "s3://my-s3bucket-example-1/TransformJobDataInput.txt"
}
}
},
"TransformOutput": {
"S3OutputPath": "s3://my-s3bucket-example-1/TransformJobOutputPath"
},
"TransformResources": {
"InstanceCount": 1,
"InstanceType": "ml.m4.xlarge"
},
"TransformJobName": "sfn-binary-classification-prediction"
},
"Next": "ValidateOutput"
},
SageMaker Training Job Example
The following includes a Task
state that creates an Amazon SageMaker training
job.
{
"SageMaker CreateTrainingJob":{
"Type":"Task",
"Resource":"arn:aws:states:::sagemaker:createTrainingJob.sync",
"Parameters":{
"TrainingJobName":"search-model",
"ResourceConfig":{
"InstanceCount":4,
"InstanceType":"ml.c4.8xlarge",
"VolumeSizeInGB":20
},
"HyperParameters":{
"mode":"batch_skipgram",
"epochs":"5",
"min_count":"5",
"sampling_threshold":"0.0001",
"learning_rate":"0.025",
"window_size":"5",
"vector_dim":"300",
"negative_samples":"5",
"batch_size":"11"
},
"AlgorithmSpecification":{
"TrainingImage":"...",
"TrainingInputMode":"File"
},
"OutputDataConfig":{
"S3OutputPath":"s3://bucket-name/doc-search/model"
},
"StoppingCondition":{
"MaxRuntimeInSeconds":100000
},
"RoleArn":"arn:aws:iam::123456789012:role/docsearch-stepfunction-iam-role",
"InputDataConfig":[
{
"ChannelName":"train",
"DataSource":{
"S3DataSource":{
"S3DataType":"S3Prefix",
"S3Uri":"s3://bucket-name/doc-search/interim-data/training-data/",
"S3DataDistributionType":"FullyReplicated"
}
}
}
]
},
"Retry":[
{
"ErrorEquals":[
"SageMaker.AmazonSageMakerException"
],
"IntervalSeconds":1,
"MaxAttempts":100,
"BackoffRate":1.1
},
{
"ErrorEquals":[
"SageMaker.ResourceLimitExceededException"
],
"IntervalSeconds":60,
"MaxAttempts":5000,
"BackoffRate":1
},
{
"ErrorEquals":[
"States.Timeout"
],
"IntervalSeconds":1,
"MaxAttempts":5,
"BackoffRate":1
}
],
"Catch":[
{
"ErrorEquals":[
"States.ALL"
],
"ResultPath":"$.cause",
"Next":"Sagemaker Training Job Error"
}
],
"Next":"Delete Interim Data Job"
}
}
SageMaker Labeling Job Example
The following includes a Task
state that creates an Amazon SageMaker labeling
job.
{
"StartAt": "SageMaker CreateLabelingJob",
"TimeoutSeconds": 3600,
"States": {
"SageMaker CreateLabelingJob": {
"Type": "Task",
"Resource": "arn:aws:states:::sagemaker:createLabelingJob.sync",
"Parameters": {
"HumanTaskConfig": {
"AnnotationConsolidationConfig": {
"AnnotationConsolidationLambdaArn": "arn:aws:lambda:us-west-2:123456789012:function:ACS-TextMultiClass"
},
"NumberOfHumanWorkersPerDataObject": 1,
"PreHumanTaskLambdaArn": "arn:aws:lambda:us-west-2:123456789012:function:PRE-TextMultiClass",
"TaskDescription": "Classify the following text",
"TaskKeywords": [
"tc",
"Labeling"
],
"TaskTimeLimitInSeconds": 300,
"TaskTitle": "Classify short bits of text",
"UiConfig": {
"UiTemplateS3Uri": "s3://s3bucket-example/TextClassification.template"
},
"WorkteamArn": "arn:aws:sagemaker:us-west-2:123456789012:workteam/private-crowd/ExampleTesting"
},
"InputConfig": {
"DataAttributes": {
"ContentClassifiers": [
"FreeOfPersonallyIdentifiableInformation",
"FreeOfAdultContent"
]
},
"DataSource": {
"S3DataSource": {
"ManifestS3Uri": "s3://s3bucket-example/manifest.json"
}
}
},
"LabelAttributeName": "Categories",
"LabelCategoryConfigS3Uri": "s3://s3bucket-example/labelcategories.json",
"LabelingJobName": "example-job-name",
"OutputConfig": {
"S3OutputPath": "s3://s3bucket-example/output"
},
"RoleArn": "arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole",
"StoppingConditions": {
"MaxHumanLabeledObjectCount": 10000,
"MaxPercentageOfInputDatasetLabeled": 100
}
},
"Next": "ValidateOutput"
},
"ValidateOutput": {
"Type": "Choice",
"Choices": [
{
"Not": {
"Variable": "$.LabelingJobArn",
"StringEquals": ""
},
"Next": "Succeed"
}
],
"Default": "Fail"
},
"Succeed": {
"Type": "Succeed"
},
"Fail": {
"Type": "Fail",
"Error": "InvalidOutput",
"Cause": "Output is not what was expected. This could be due to a service outage or a misconfigured service integration."
}
}
}
SageMaker Processing Job Example
The following includes a Task
state that creates an Amazon SageMaker processing
job.
{
"StartAt": "SageMaker CreateProcessingJob Sync",
"TimeoutSeconds": 3600,
"States": {
"SageMaker CreateProcessingJob Sync": {
"Type": "Task",
"Resource": "arn:aws:states:::sagemaker:createProcessingJob.sync",
"Parameters": {
"AppSpecification": {
"ImageUri": "737474898029.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3"
},
"ProcessingResources": {
"ClusterConfig": {
"InstanceCount": 1,
"InstanceType": "ml.t3.medium",
"VolumeSizeInGB": 10
}
},
"RoleArn": "arn:aws:iam::123456789012:role/SM-003-CreateProcessingJobAPIExecutionRole",
"ProcessingJobName.$": "$.id"
},
"Next": "ValidateOutput"
},
"ValidateOutput": {
"Type": "Choice",
"Choices": [
{
"Not": {
"Variable": "$.ProcessingJobArn",
"StringEquals": ""
},
"Next": "Succeed"
}
],
"Default": "Fail"
},
"Succeed": {
"Type": "Succeed"
},
"Fail": {
"Type": "Fail",
"Error": "InvalidConnectorOutput",
"Cause": "Connector output is not what was expected. This could be due to a service outage or a misconfigured connector."
}
}
}
For information about how to configure IAM permissions when using Step Functions with other Amazon services, see How Step Functions generates IAM policies for integrated services.