Get Started
- Introduction
- Carbon Connect
- SDKs
Source Connectors
Destination Connectors
- Vector Storage
Learn
- Models
- Cold Storage
- Search
- File Formats
- OCR
- Webhooks
- Metadata and Filtering
- White Label
Manage Environment Variables
Environment variables for the Carbon self-hosting application to be passed as a yaml
file.
The config.yaml
file is a configuration file that stores all the environment variables for the application. This file should be placed at the root of the /app
directory. It can have multiple environments such as dev, prod, etc. Each environment should be placed under a key with the name of the environment, and the key must be at the top level of the file. The environment that is used is determined by the global_env
key-value pair (e.g. global_env: dev
). It’s important to note that the variables in this file are converted into environment variables. Essentially, every key-value pair at the leaf level is extracted. The key becomes the key of the environment variable in all caps, and the value remains the same.
Example: The microsoft_file_picker_client_id
variable under prod.data_connects.onedrive
would be converted to the env variable: MICROSOFT_CLIENT_FILE_PICKER_CLIENT_ID=test_id_here
.
.env
file or simply as regular environment variables. global_env: dev
dev:
general:
env: '' # local, dev, or prod
data_source_encryption_key: ''
hash_bearer_token: '' # true or false
api_hostname: ''
auth_access_token_secret: ''
auth_refresh_token_secret: ''
disable_api: '' # true or false
disable_background_processes: '' # true or false
disable_resync: '' # true or false
disable_textract: '' # true or false
queue_backend: '' # Queue backend (PUBSUB , SQS, ElasticMQ, or NATS)
send_emails: '' # Control whether to send emails or not (true or false). Defaults to true.
use_custom_queue: '' # If using a custom queue, provide the path to the file it's implemented in.
vertex_service_account_key: '' # For local dev
carbon_webhook_signing_key: '' # for testing carbon webhooks
disable_rate_limits: '' # true or false
disable_protected_routes: '' # default true, can be set to false to enable certain routes in a safe env
disable_autosync_service: '' # default false, can be set to true to disable autosyncing files from 3rd party connectors
request_timeouts:
default_request_timeout: 7 # used for all requests made to external APIs and URLs by default
web_scrape_request_timeout: 60 # used for requests made to apify during web scrape
<data_source_name>_request_timeout: 7 # used to customise request timeout for specific data sources. Replace data_source_name with actual name, for example notion_request_timeout and google_drive_request_timeout
storage:
storage_backend: '' # Storage backend (S3, S3-compatible, GCS, Azure, or local)
storage_endpoint_url: '' # only relevant for S3-compatible storage
storage_access_key_id: '' # only relevant for S3-compatible storage
storage_secret_access_key: '' # only relevant for S3-compatible storage
storage_tls: '' # only relevant for S3-compatible storage (true or false)
storage_sse: '' # only relevant for S3-compatible storage (true or false)
azure_storage_account_url: '' # only relevant for Azure storage
gcs_service_account_email: '' # required if using GCP with a service account
logging:
better_exceptions: 1
log_json_format: '' # true or false
log_level: '' # INFO or DEBUG
no_logging: '' # true or false
db:
db_host: ''
db_host_reader: ''
db_name: ''
db_name_reader: ''
db_password: ''
db_password_reader: ''
db_port: ''
db_port_reader: ''
db_schema: ''
db_schema_reader: ''
db_user: ''
db_user_reader: ''
instance_db_pool_overflow: ''
instance_db_pool_size: ''
redis:
# only redis_db and redis_host are requried
redis_db: ''
redis_host: ''
redis_password: ''
redis_port: ''
vector_db:
qdrant_api_key: ''
qdrant_bm25_collection_name: ''
qdrant_cohere_multilingual_v3_collection_name: ''
qdrant_collection_name: ''
qdrant_openai_ada_large_1024_collection_name: ''
qdrant_openai_ada_large_256_collection_name: ''
qdrant_openai_ada_large_3072_collection_name: ''
qdrant_openai_ada_small_1536_collection_name: ''
qdrant_openai_ada_small_512_collection_name: ''
qdrant_solar_1_mini_collection_name: ''
qdrant_url: ''
qdrant_vertex_multimodal_collection_name: ''
s3_buckets:
rss_feeds_bucket: ''
s3_cold_storage_bucket: ''
s3_config_bucket: ''
s3_customer_files_bucket: ''
s3_dictionary_and_tfidf_bucket: ''
gcs_buckets:
gcs_customer_videos_bucket: ''
queues:
# only required if using sqs
sqs_all_uploaded_files_queued_webhook_queue: ''
sqs_bm25_queue: ''
sqs_cold_storage_webhook_queue: ''
sqs_cold_to_hot_storage_queue: ''
sqs_customer_files_queue: ''
sqs_data_source_actions_webhook_queue: ''
sqs_data_source_ready_webhook_queue: ''
sqs_delete_files_queue: ''
sqs_delete_files_v2_queue: ''
sqs_delete_users_queue: ''
sqs_external_api_customer_files_queue: ''
sqs_external_api_dropbox_queue: ''
sqs_external_api_emails_queue: ''
sqs_external_api_help_center_queue: ''
sqs_external_api_onedrive_queue: ''
sqs_external_source_items_queue: ''
sqs_fetch_user_webpages_queue: ''
sqs_file_deleted_webhook_queue: ''
sqs_file_error_webhook_queue: ''
sqs_file_sync_limit_reached_webhook_queue: ''
sqs_file_syncing_webhook_queue: ''
sqs_files_created_webhook_queue: ''
sqs_google_drive_queue: ''
sqs_notion_rate_limit_queue: ''
sqs_organization_user_deleted_webhook_queue: ''
sqs_rate_limit_error_webhook_queue: ''
sqs_raw_text_uploads_queue: ''
sqs_video_uploads_queue: ''
sqs_webhook_queue: ''
sqs_webpage_webhooks_queue: ''
sqs_webscrape_batch_queue: ''
sqs_webscrape_queue: ''
sqs_zotero_queue: ''
sqs_all_files_processed_webhook_queue: ''
sqs_external_api_slack_queue: ''
# Dedicated autosync queues can optionally be created for all third party connectors.
# The naming convention for env variables will be appending _autosync to the original queue.
# For example autosync queue for sqs_external_api_dropbox_queue will be sqs_external_api_dropbox_queue_autosync
sqs_external_api_customer_files_queue_autosync: ''
sqs_external_api_dropbox_queue_autosync: ''
sqs_external_api_emails_queue_autosync: ''
sqs_external_api_help_center_queue_autosync: ''
sqs_external_api_onedrive_queue_autosync: ''
sqs_google_drive_queue_autosync: ''
sqs_notion_rate_limit_queue_autosync: ''
sqs_external_api_slack_queue_autosync: ''
pubsub:
# only required if using pubsub instead of sqs
pubsub_project_id: ''
pubsub_topic: ''
nats:
# only required if using nats instead of sqs
nats_connection_string: ''
use_jetstream: '' # true or false
embedding_models:
cohere:
cohere_api_key: ''
openai:
azure_openai_ada_large_deployment_id: ''
azure_openai_ada_small_deployment_id: ''
azure_openai_api_base: ''
azure_openai_api_key: ''
azure_openai_deployment_id: ''
azure_openai_v3_api_base: ''
azure_openai_v3_api_key: ''
default_ada_large_1024_model: ''
default_ada_large_256_model: ''
default_ada_large_3072_model: ''
default_ada_small_1536_model: ''
default_ada_small_512_model: ''
default_query_text_embedding_model: ''
openai_accepted_latency_seconds: ''
openai_api_key: ''
jina:
jina_api_key: ''
solar:
solar_api_key: ''
data_connectors:
protect_default_oauth_access_tokens: ''
atlassian:
atlassian_client_id: ''
atlassian_client_secret: ''
box:
box_client_id: ''
box_client_secret: ''
dropbox:
alt_2_dropbox_app_key: ''
alt_2_dropbox_app_secret: ''
alt_dropbox_app_key: ''
alt_dropbox_app_secret: ''
dropbox_app_key: ''
dropbox_app_secret: ''
gmail:
alt_3_gmail_client_id: ''
alt_3_gmail_client_secret: ''
alt_2_gmail_client_id: ''
alt_2_gmail_client_secret: ''
alt_gmail_client_id: ''
alt_gmail_client_secret: ''
gmail_client_id: ''
gmail_client_secret: ''
google:
google_alt_2_api_key: ''
google_alt_2_client_id: ''
google_alt_2_client_secret: ''
google_alt_3_api_key: ''
google_alt_3_client_id: ''
google_alt_3_client_secret: ''
google_alt_4_api_key: ''
google_alt_4_client_id: ''
google_alt_4_client_secret: ''
google_alt_api_key: ''
google_alt_client_id: ''
google_alt_client_secret: ''
google_api_key: ''
google_client_id: ''
google_client_secret: ''
google_redirect_uri: ''
google_cloud_storage:
google_cloud_storage_api_key: ''
google_cloud_storage_client_id: ''
google_cloud_storage_client_secret: ''
intercom:
intercom_client_id: ''
intercom_client_secret: ''
notion:
notion_client_id: ''
notion_client_secret: ''
notion_redirect_uri: ''
one_drive:
microsoft_file_picker_client_id: ''
onedrive_client_id: ''
onedrive_client_secret: ''
salesforce:
salesforce_client_id: ''
salesforce_client_secret: ''
salesforce_verifier: ''
slack:
slack_client_id: ''
slack_client_secret: ''
zendesk:
zendesk_app_subdomain: ''
zendesk_client_id: ''
zendesk_client_secret: ''
zotero:
zotero_client_key: ''
zotero_client_secret: ''
gong:
gong_client_id: ''
gong_client_secret: ''
data_processors:
apify:
apify_actor_id: ''
apify_api_key: ''
apify_api_key_for_resync: ''
default_apify_account_max_ram: ''
apify_slack_alerts_token: ''
apify_slack_alerts_channel: ''
sauce_labs:
sauce_labs_access_key: ''
sauce_labs_region: ''
sauce_labs_username: ''
assemblyai_api_key: ''
deepgram_api_key: ''
metaphor_api_key: ''
youtube_transcript_proxy: ''
loops:
loops_api_key: ''
paigo:
paigo_chars_stored_dimension_id: ''
paigo_chars_synced_dimension_id: ''
paigo_client_id: ''
paigo_client_secret: ''
paigo_embedding_dimension_id: ''
stripe:
stripe_api_key: ''
stripe_bytes_uploaded_event_name: ''
stripe_characters_synced_event_name: ''
stripe_pages_ocr_scanned_event_name: ''
stripe_urls_scraped_event_name: ''
supabase:
supabase_host: ''
supabase_name: ''
supabase_password: ''
supabase_port: ''
supabase_service_key: ''
supabase_url: ''
supabase_user: ''