Skip to content

Create Data Source

bedrockagent_create_data_source R Documentation

Connects a knowledge base to a data source

Description

Connects a knowledge base to a data source. You specify the configuration for the specific data source service in the dataSourceConfiguration field.

You can't change the chunkingConfiguration after you create the data source connector.

Usage

bedrockagent_create_data_source(clientToken, dataDeletionPolicy,
  dataSourceConfiguration, description, knowledgeBaseId, name,
  serverSideEncryptionConfiguration, vectorIngestionConfiguration)

Arguments

clientToken

A unique, case-sensitive identifier to ensure that the API request completes no more than one time. If this token matches a previous request, Amazon Bedrock ignores the request, but does not return an error. For more information, see Ensuring idempotency.

dataDeletionPolicy

The data deletion policy for the data source.

You can set the data deletion policy to:

  • DELETE: Deletes all data from your data source that’s converted into vector embeddings upon deletion of a knowledge base or data source resource. Note that the vector store itself is not deleted, only the data. This flag is ignored if an Amazon Web Services account is deleted.

  • RETAIN: Retains all data from your data source that’s converted into vector embeddings upon deletion of a knowledge base or data source resource. Note that the vector store itself is not deleted if you delete a knowledge base or data source resource.

dataSourceConfiguration

[required] The connection configuration for the data source.

description

A description of the data source.

knowledgeBaseId

[required] The unique identifier of the knowledge base to which to add the data source.

name

[required] The name of the data source.

serverSideEncryptionConfiguration

Contains details about the server-side encryption for the data source.

vectorIngestionConfiguration

Contains details about how to ingest the documents in the data source.

Value

A list with the following syntax:

list(
  dataSource = list(
    createdAt = as.POSIXct(
      "2015-01-01"
    ),
    dataDeletionPolicy = "RETAIN"|"DELETE",
    dataSourceConfiguration = list(
      confluenceConfiguration = list(
        crawlerConfiguration = list(
          filterConfiguration = list(
            patternObjectFilter = list(
              filters = list(
                list(
                  exclusionFilters = list(
                    "string"
                  ),
                  inclusionFilters = list(
                    "string"
                  ),
                  objectType = "string"
                )
              )
            ),
            type = "PATTERN"
          )
        ),
        sourceConfiguration = list(
          authType = "BASIC"|"OAUTH2_CLIENT_CREDENTIALS",
          credentialsSecretArn = "string",
          hostType = "SAAS",
          hostUrl = "string"
        )
      ),
      s3Configuration = list(
        bucketArn = "string",
        bucketOwnerAccountId = "string",
        inclusionPrefixes = list(
          "string"
        )
      ),
      salesforceConfiguration = list(
        crawlerConfiguration = list(
          filterConfiguration = list(
            patternObjectFilter = list(
              filters = list(
                list(
                  exclusionFilters = list(
                    "string"
                  ),
                  inclusionFilters = list(
                    "string"
                  ),
                  objectType = "string"
                )
              )
            ),
            type = "PATTERN"
          )
        ),
        sourceConfiguration = list(
          authType = "OAUTH2_CLIENT_CREDENTIALS",
          credentialsSecretArn = "string",
          hostUrl = "string"
        )
      ),
      sharePointConfiguration = list(
        crawlerConfiguration = list(
          filterConfiguration = list(
            patternObjectFilter = list(
              filters = list(
                list(
                  exclusionFilters = list(
                    "string"
                  ),
                  inclusionFilters = list(
                    "string"
                  ),
                  objectType = "string"
                )
              )
            ),
            type = "PATTERN"
          )
        ),
        sourceConfiguration = list(
          authType = "OAUTH2_CLIENT_CREDENTIALS"|"OAUTH2_SHAREPOINT_APP_ONLY_CLIENT_CREDENTIALS",
          credentialsSecretArn = "string",
          domain = "string",
          hostType = "ONLINE",
          siteUrls = list(
            "string"
          ),
          tenantId = "string"
        )
      ),
      type = "S3"|"WEB"|"CONFLUENCE"|"SALESFORCE"|"SHAREPOINT"|"CUSTOM"|"REDSHIFT_METADATA",
      webConfiguration = list(
        crawlerConfiguration = list(
          crawlerLimits = list(
            maxPages = 123,
            rateLimit = 123
          ),
          exclusionFilters = list(
            "string"
          ),
          inclusionFilters = list(
            "string"
          ),
          scope = "HOST_ONLY"|"SUBDOMAINS",
          userAgent = "string"
        ),
        sourceConfiguration = list(
          urlConfiguration = list(
            seedUrls = list(
              list(
                url = "string"
              )
            )
          )
        )
      )
    ),
    dataSourceId = "string",
    description = "string",
    failureReasons = list(
      "string"
    ),
    knowledgeBaseId = "string",
    name = "string",
    serverSideEncryptionConfiguration = list(
      kmsKeyArn = "string"
    ),
    status = "AVAILABLE"|"DELETING"|"DELETE_UNSUCCESSFUL",
    updatedAt = as.POSIXct(
      "2015-01-01"
    ),
    vectorIngestionConfiguration = list(
      chunkingConfiguration = list(
        chunkingStrategy = "FIXED_SIZE"|"NONE"|"HIERARCHICAL"|"SEMANTIC",
        fixedSizeChunkingConfiguration = list(
          maxTokens = 123,
          overlapPercentage = 123
        ),
        hierarchicalChunkingConfiguration = list(
          levelConfigurations = list(
            list(
              maxTokens = 123
            )
          ),
          overlapTokens = 123
        ),
        semanticChunkingConfiguration = list(
          breakpointPercentileThreshold = 123,
          bufferSize = 123,
          maxTokens = 123
        )
      ),
      customTransformationConfiguration = list(
        intermediateStorage = list(
          s3Location = list(
            uri = "string"
          )
        ),
        transformations = list(
          list(
            stepToApply = "POST_CHUNKING",
            transformationFunction = list(
              transformationLambdaConfiguration = list(
                lambdaArn = "string"
              )
            )
          )
        )
      ),
      parsingConfiguration = list(
        bedrockDataAutomationConfiguration = list(
          parsingModality = "MULTIMODAL"
        ),
        bedrockFoundationModelConfiguration = list(
          modelArn = "string",
          parsingModality = "MULTIMODAL",
          parsingPrompt = list(
            parsingPromptText = "string"
          )
        ),
        parsingStrategy = "BEDROCK_FOUNDATION_MODEL"|"BEDROCK_DATA_AUTOMATION"
      )
    )
  )
)

Request syntax

svc$create_data_source(
  clientToken = "string",
  dataDeletionPolicy = "RETAIN"|"DELETE",
  dataSourceConfiguration = list(
    confluenceConfiguration = list(
      crawlerConfiguration = list(
        filterConfiguration = list(
          patternObjectFilter = list(
            filters = list(
              list(
                exclusionFilters = list(
                  "string"
                ),
                inclusionFilters = list(
                  "string"
                ),
                objectType = "string"
              )
            )
          ),
          type = "PATTERN"
        )
      ),
      sourceConfiguration = list(
        authType = "BASIC"|"OAUTH2_CLIENT_CREDENTIALS",
        credentialsSecretArn = "string",
        hostType = "SAAS",
        hostUrl = "string"
      )
    ),
    s3Configuration = list(
      bucketArn = "string",
      bucketOwnerAccountId = "string",
      inclusionPrefixes = list(
        "string"
      )
    ),
    salesforceConfiguration = list(
      crawlerConfiguration = list(
        filterConfiguration = list(
          patternObjectFilter = list(
            filters = list(
              list(
                exclusionFilters = list(
                  "string"
                ),
                inclusionFilters = list(
                  "string"
                ),
                objectType = "string"
              )
            )
          ),
          type = "PATTERN"
        )
      ),
      sourceConfiguration = list(
        authType = "OAUTH2_CLIENT_CREDENTIALS",
        credentialsSecretArn = "string",
        hostUrl = "string"
      )
    ),
    sharePointConfiguration = list(
      crawlerConfiguration = list(
        filterConfiguration = list(
          patternObjectFilter = list(
            filters = list(
              list(
                exclusionFilters = list(
                  "string"
                ),
                inclusionFilters = list(
                  "string"
                ),
                objectType = "string"
              )
            )
          ),
          type = "PATTERN"
        )
      ),
      sourceConfiguration = list(
        authType = "OAUTH2_CLIENT_CREDENTIALS"|"OAUTH2_SHAREPOINT_APP_ONLY_CLIENT_CREDENTIALS",
        credentialsSecretArn = "string",
        domain = "string",
        hostType = "ONLINE",
        siteUrls = list(
          "string"
        ),
        tenantId = "string"
      )
    ),
    type = "S3"|"WEB"|"CONFLUENCE"|"SALESFORCE"|"SHAREPOINT"|"CUSTOM"|"REDSHIFT_METADATA",
    webConfiguration = list(
      crawlerConfiguration = list(
        crawlerLimits = list(
          maxPages = 123,
          rateLimit = 123
        ),
        exclusionFilters = list(
          "string"
        ),
        inclusionFilters = list(
          "string"
        ),
        scope = "HOST_ONLY"|"SUBDOMAINS",
        userAgent = "string"
      ),
      sourceConfiguration = list(
        urlConfiguration = list(
          seedUrls = list(
            list(
              url = "string"
            )
          )
        )
      )
    )
  ),
  description = "string",
  knowledgeBaseId = "string",
  name = "string",
  serverSideEncryptionConfiguration = list(
    kmsKeyArn = "string"
  ),
  vectorIngestionConfiguration = list(
    chunkingConfiguration = list(
      chunkingStrategy = "FIXED_SIZE"|"NONE"|"HIERARCHICAL"|"SEMANTIC",
      fixedSizeChunkingConfiguration = list(
        maxTokens = 123,
        overlapPercentage = 123
      ),
      hierarchicalChunkingConfiguration = list(
        levelConfigurations = list(
          list(
            maxTokens = 123
          )
        ),
        overlapTokens = 123
      ),
      semanticChunkingConfiguration = list(
        breakpointPercentileThreshold = 123,
        bufferSize = 123,
        maxTokens = 123
      )
    ),
    customTransformationConfiguration = list(
      intermediateStorage = list(
        s3Location = list(
          uri = "string"
        )
      ),
      transformations = list(
        list(
          stepToApply = "POST_CHUNKING",
          transformationFunction = list(
            transformationLambdaConfiguration = list(
              lambdaArn = "string"
            )
          )
        )
      )
    ),
    parsingConfiguration = list(
      bedrockDataAutomationConfiguration = list(
        parsingModality = "MULTIMODAL"
      ),
      bedrockFoundationModelConfiguration = list(
        modelArn = "string",
        parsingModality = "MULTIMODAL",
        parsingPrompt = list(
          parsingPromptText = "string"
        )
      ),
      parsingStrategy = "BEDROCK_FOUNDATION_MODEL"|"BEDROCK_DATA_AUTOMATION"
    )
  )
)