Create Data Source
bedrockagent_create_data_source | R Documentation |
Connects a knowledge base to a data source¶
Description¶
Connects a knowledge base to a data source. You specify the
configuration for the specific data source service in the
dataSourceConfiguration
field.
You can't change the chunkingConfiguration
after you create the data
source connector.
Usage¶
bedrockagent_create_data_source(clientToken, dataDeletionPolicy,
dataSourceConfiguration, description, knowledgeBaseId, name,
serverSideEncryptionConfiguration, vectorIngestionConfiguration)
Arguments¶
clientToken
A unique, case-sensitive identifier to ensure that the API request completes no more than one time. If this token matches a previous request, Amazon Bedrock ignores the request, but does not return an error. For more information, see Ensuring idempotency.
dataDeletionPolicy
The data deletion policy for the data source.
You can set the data deletion policy to:
DELETE: Deletes all data from your data source that’s converted into vector embeddings upon deletion of a knowledge base or data source resource. Note that the vector store itself is not deleted, only the data. This flag is ignored if an Amazon Web Services account is deleted.
RETAIN: Retains all data from your data source that’s converted into vector embeddings upon deletion of a knowledge base or data source resource. Note that the vector store itself is not deleted if you delete a knowledge base or data source resource.
dataSourceConfiguration
[required] The connection configuration for the data source.
description
A description of the data source.
knowledgeBaseId
[required] The unique identifier of the knowledge base to which to add the data source.
name
[required] The name of the data source.
serverSideEncryptionConfiguration
Contains details about the server-side encryption for the data source.
vectorIngestionConfiguration
Contains details about how to ingest the documents in the data source.
Value¶
A list with the following syntax:
list(
dataSource = list(
createdAt = as.POSIXct(
"2015-01-01"
),
dataDeletionPolicy = "RETAIN"|"DELETE",
dataSourceConfiguration = list(
confluenceConfiguration = list(
crawlerConfiguration = list(
filterConfiguration = list(
patternObjectFilter = list(
filters = list(
list(
exclusionFilters = list(
"string"
),
inclusionFilters = list(
"string"
),
objectType = "string"
)
)
),
type = "PATTERN"
)
),
sourceConfiguration = list(
authType = "BASIC"|"OAUTH2_CLIENT_CREDENTIALS",
credentialsSecretArn = "string",
hostType = "SAAS",
hostUrl = "string"
)
),
s3Configuration = list(
bucketArn = "string",
bucketOwnerAccountId = "string",
inclusionPrefixes = list(
"string"
)
),
salesforceConfiguration = list(
crawlerConfiguration = list(
filterConfiguration = list(
patternObjectFilter = list(
filters = list(
list(
exclusionFilters = list(
"string"
),
inclusionFilters = list(
"string"
),
objectType = "string"
)
)
),
type = "PATTERN"
)
),
sourceConfiguration = list(
authType = "OAUTH2_CLIENT_CREDENTIALS",
credentialsSecretArn = "string",
hostUrl = "string"
)
),
sharePointConfiguration = list(
crawlerConfiguration = list(
filterConfiguration = list(
patternObjectFilter = list(
filters = list(
list(
exclusionFilters = list(
"string"
),
inclusionFilters = list(
"string"
),
objectType = "string"
)
)
),
type = "PATTERN"
)
),
sourceConfiguration = list(
authType = "OAUTH2_CLIENT_CREDENTIALS"|"OAUTH2_SHAREPOINT_APP_ONLY_CLIENT_CREDENTIALS",
credentialsSecretArn = "string",
domain = "string",
hostType = "ONLINE",
siteUrls = list(
"string"
),
tenantId = "string"
)
),
type = "S3"|"WEB"|"CONFLUENCE"|"SALESFORCE"|"SHAREPOINT"|"CUSTOM"|"REDSHIFT_METADATA",
webConfiguration = list(
crawlerConfiguration = list(
crawlerLimits = list(
maxPages = 123,
rateLimit = 123
),
exclusionFilters = list(
"string"
),
inclusionFilters = list(
"string"
),
scope = "HOST_ONLY"|"SUBDOMAINS",
userAgent = "string"
),
sourceConfiguration = list(
urlConfiguration = list(
seedUrls = list(
list(
url = "string"
)
)
)
)
)
),
dataSourceId = "string",
description = "string",
failureReasons = list(
"string"
),
knowledgeBaseId = "string",
name = "string",
serverSideEncryptionConfiguration = list(
kmsKeyArn = "string"
),
status = "AVAILABLE"|"DELETING"|"DELETE_UNSUCCESSFUL",
updatedAt = as.POSIXct(
"2015-01-01"
),
vectorIngestionConfiguration = list(
chunkingConfiguration = list(
chunkingStrategy = "FIXED_SIZE"|"NONE"|"HIERARCHICAL"|"SEMANTIC",
fixedSizeChunkingConfiguration = list(
maxTokens = 123,
overlapPercentage = 123
),
hierarchicalChunkingConfiguration = list(
levelConfigurations = list(
list(
maxTokens = 123
)
),
overlapTokens = 123
),
semanticChunkingConfiguration = list(
breakpointPercentileThreshold = 123,
bufferSize = 123,
maxTokens = 123
)
),
customTransformationConfiguration = list(
intermediateStorage = list(
s3Location = list(
uri = "string"
)
),
transformations = list(
list(
stepToApply = "POST_CHUNKING",
transformationFunction = list(
transformationLambdaConfiguration = list(
lambdaArn = "string"
)
)
)
)
),
parsingConfiguration = list(
bedrockDataAutomationConfiguration = list(
parsingModality = "MULTIMODAL"
),
bedrockFoundationModelConfiguration = list(
modelArn = "string",
parsingModality = "MULTIMODAL",
parsingPrompt = list(
parsingPromptText = "string"
)
),
parsingStrategy = "BEDROCK_FOUNDATION_MODEL"|"BEDROCK_DATA_AUTOMATION"
)
)
)
)
Request syntax¶
svc$create_data_source(
clientToken = "string",
dataDeletionPolicy = "RETAIN"|"DELETE",
dataSourceConfiguration = list(
confluenceConfiguration = list(
crawlerConfiguration = list(
filterConfiguration = list(
patternObjectFilter = list(
filters = list(
list(
exclusionFilters = list(
"string"
),
inclusionFilters = list(
"string"
),
objectType = "string"
)
)
),
type = "PATTERN"
)
),
sourceConfiguration = list(
authType = "BASIC"|"OAUTH2_CLIENT_CREDENTIALS",
credentialsSecretArn = "string",
hostType = "SAAS",
hostUrl = "string"
)
),
s3Configuration = list(
bucketArn = "string",
bucketOwnerAccountId = "string",
inclusionPrefixes = list(
"string"
)
),
salesforceConfiguration = list(
crawlerConfiguration = list(
filterConfiguration = list(
patternObjectFilter = list(
filters = list(
list(
exclusionFilters = list(
"string"
),
inclusionFilters = list(
"string"
),
objectType = "string"
)
)
),
type = "PATTERN"
)
),
sourceConfiguration = list(
authType = "OAUTH2_CLIENT_CREDENTIALS",
credentialsSecretArn = "string",
hostUrl = "string"
)
),
sharePointConfiguration = list(
crawlerConfiguration = list(
filterConfiguration = list(
patternObjectFilter = list(
filters = list(
list(
exclusionFilters = list(
"string"
),
inclusionFilters = list(
"string"
),
objectType = "string"
)
)
),
type = "PATTERN"
)
),
sourceConfiguration = list(
authType = "OAUTH2_CLIENT_CREDENTIALS"|"OAUTH2_SHAREPOINT_APP_ONLY_CLIENT_CREDENTIALS",
credentialsSecretArn = "string",
domain = "string",
hostType = "ONLINE",
siteUrls = list(
"string"
),
tenantId = "string"
)
),
type = "S3"|"WEB"|"CONFLUENCE"|"SALESFORCE"|"SHAREPOINT"|"CUSTOM"|"REDSHIFT_METADATA",
webConfiguration = list(
crawlerConfiguration = list(
crawlerLimits = list(
maxPages = 123,
rateLimit = 123
),
exclusionFilters = list(
"string"
),
inclusionFilters = list(
"string"
),
scope = "HOST_ONLY"|"SUBDOMAINS",
userAgent = "string"
),
sourceConfiguration = list(
urlConfiguration = list(
seedUrls = list(
list(
url = "string"
)
)
)
)
)
),
description = "string",
knowledgeBaseId = "string",
name = "string",
serverSideEncryptionConfiguration = list(
kmsKeyArn = "string"
),
vectorIngestionConfiguration = list(
chunkingConfiguration = list(
chunkingStrategy = "FIXED_SIZE"|"NONE"|"HIERARCHICAL"|"SEMANTIC",
fixedSizeChunkingConfiguration = list(
maxTokens = 123,
overlapPercentage = 123
),
hierarchicalChunkingConfiguration = list(
levelConfigurations = list(
list(
maxTokens = 123
)
),
overlapTokens = 123
),
semanticChunkingConfiguration = list(
breakpointPercentileThreshold = 123,
bufferSize = 123,
maxTokens = 123
)
),
customTransformationConfiguration = list(
intermediateStorage = list(
s3Location = list(
uri = "string"
)
),
transformations = list(
list(
stepToApply = "POST_CHUNKING",
transformationFunction = list(
transformationLambdaConfiguration = list(
lambdaArn = "string"
)
)
)
)
),
parsingConfiguration = list(
bedrockDataAutomationConfiguration = list(
parsingModality = "MULTIMODAL"
),
bedrockFoundationModelConfiguration = list(
modelArn = "string",
parsingModality = "MULTIMODAL",
parsingPrompt = list(
parsingPromptText = "string"
)
),
parsingStrategy = "BEDROCK_FOUNDATION_MODEL"|"BEDROCK_DATA_AUTOMATION"
)
)
)