from sagemaker.core import image_uris
from sagemaker.core import model_uris, script_uris
train_model_id, train_model_version, train_scope = "lightgbm-classification-model", "*", "training"
training_instance_type = "ml.m5.xlarge"
# Retrieve the docker image
train_image_uri = image_uris.retrieve(
region=None,
framework=None,
model_id=train_model_id,
model_version=train_model_version,
image_scope=train_scope,
instance_type=training_instance_type
)
# Retrieve the training script
train_source_uri = script_uris.retrieve(
model_id=train_model_id, model_version=train_model_version, script_scope=train_scope
)
train_model_uri = model_uris.retrieve(
model_id=train_model_id, model_version=train_model_version, model_scope=train_scope
)
# Sample training data is available in this bucket
training_data_bucket = f"jumpstart-cache-prod-{aws_region}"
training_data_prefix = "training-datasets/tabular_multiclass/"
training_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}/train"
validation_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}/validation"
output_bucket = sess.default_bucket()
output_prefix = "jumpstart-example-tabular-training"
s3_output_location = f"s3://{output_bucket}/{output_prefix}/output"
from sagemaker import hyperparameters
# Retrieve the default hyperparameters for training the model
hyperparameters = hyperparameters.retrieve_default(
model_id=train_model_id, model_version=train_model_version
)
# [Optional] Override default hyperparameters with custom values
hyperparameters[
"num_boost_round"
] = "500"
print(hyperparameters)
from sagemaker.train import ModelTrainer
from sagemaker.train.configs import InputData
from sagemaker.train.configs import SourceCode, Compute, StoppingCondition, OutputDataConfig
from sagemaker.utils import name_from_base
training_job_name = name_from_base(f"built-in-algo-{train_model_id}-training")
# Create SageMaker ModelTrainer instance
tabular_model_trainer = ModelTrainer(
role=aws_role,
training_image=train_image_uri,
source_code=SourceCode(source_dir=train_source_uri, entry_script="transfer_learning.py"),
# In V3, pre-trained model artifacts are passed via input_data_config
compute=Compute(instance_type=training_instance_type, instance_count=1), # for distributed training, specify an instance_count greater than 1
stopping_condition=StoppingCondition(max_runtime_in_seconds=360000),
hyperparameters=hyperparameters,
output_data_config=OutputDataConfig(s3_output_path=s3_output_location)
)
# Launch a SageMaker Training job by passing the S3 path of the training data
tabular_model_trainer.train(
input_data_config=[
InputData(channel_name="training", data_source=training_dataset_s3_path),
InputData(channel_name="validation", data_source=validation_dataset_s3_path),
InputData(channel_name="model", data_source=train_model_uri),
]
)
from sagemaker import image_uris, model_uris, script_uris
train_model_id, train_model_version, train_scope = "lightgbm-classification-model", "*", "training"
training_instance_type = "ml.m5.xlarge"
# Retrieve the docker image
train_image_uri = image_uris.retrieve(
region=None,
framework=None,
model_id=train_model_id,
model_version=train_model_version,
image_scope=train_scope,
instance_type=training_instance_type
)
# Retrieve the training script
train_source_uri = script_uris.retrieve(
model_id=train_model_id, model_version=train_model_version, script_scope=train_scope
)
train_model_uri = model_uris.retrieve(
model_id=train_model_id, model_version=train_model_version, model_scope=train_scope
)
# Sample training data is available in this bucket
training_data_bucket = f"jumpstart-cache-prod-{aws_region}"
training_data_prefix = "training-datasets/tabular_multiclass/"
training_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}/train"
validation_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}/validation"
output_bucket = sess.default_bucket()
output_prefix = "jumpstart-example-tabular-training"
s3_output_location = f"s3://{output_bucket}/{output_prefix}/output"
from sagemaker import hyperparameters
# Retrieve the default hyperparameters for training the model
hyperparameters = hyperparameters.retrieve_default(
model_id=train_model_id, model_version=train_model_version
)
# [Optional] Override default hyperparameters with custom values
hyperparameters[
"num_boost_round"
] = "500"
print(hyperparameters)
from sagemaker.estimator import Estimator
from sagemaker.utils import name_from_base
training_job_name = name_from_base(f"built-in-algo-{train_model_id}-training")
# Create SageMaker Estimator instance
tabular_estimator = Estimator(
role=aws_role,
image_uri=train_image_uri,
source_dir=train_source_uri,
model_uri=train_model_uri,
entry_point="transfer_learning.py",
instance_count=1, # for distributed training, specify an instance_count greater than 1
instance_type=training_instance_type,
max_run=360000,
hyperparameters=hyperparameters,
output_path=s3_output_location
)
# Launch a SageMaker Training job by passing the S3 path of the training data
tabular_estimator.fit(
{
"train": training_dataset_s3_path,
"validation": validation_dataset_s3_path,
}, logs=True, job_name=training_job_name
)