POST
/
upload_file_from_url

Authorizations

authorization
string
headerrequired

token <token>, corresponds to temporary access tokens.

Body

application/json
url
string
required
file_name
string | null
chunk_size
integer | null
chunk_overlap
integer | null
skip_embedding_generation
boolean
default: false
set_page_as_boundary
boolean
default: false
embedding_model
enum<string>
default: OPENAI
Available options:
OPENAI,
AZURE_OPENAI,
AZURE_ADA_LARGE_256,
AZURE_ADA_LARGE_1024,
AZURE_ADA_LARGE_3072,
AZURE_ADA_SMALL_512,
AZURE_ADA_SMALL_1536,
COHERE_MULTILINGUAL_V3,
VERTEX_MULTIMODAL,
OPENAI_ADA_LARGE_256,
OPENAI_ADA_LARGE_1024,
OPENAI_ADA_LARGE_3072,
OPENAI_ADA_SMALL_512,
OPENAI_ADA_SMALL_1536,
SOLAR_1_MINI
generate_sparse_vectors
boolean
default: false
use_textract
boolean
default: false
prepend_filename_to_chunks
boolean
default: false
max_items_per_chunk
integer | null

Number of objects per chunk. For csv, tsv, xlsx, and json files only.

parse_pdf_tables_with_ocr
boolean
default: false
detect_audio_language
boolean
default: false
transcription_service
enum<string> | null
Available options:
assemblyai,
deepgram
include_speaker_labels
boolean
default: false
media_type
enum<string> | null
Available options:
TEXT,
IMAGE,
AUDIO,
VIDEO
split_rows
boolean
default: false
cold_storage_params
object
generate_chunks_only
boolean
default: false

If this flag is enabled, the file will be chunked and stored with Carbon, but no embeddings will be generated. This overrides the skip_embedding_generation flag.

store_file_only
boolean
default: false

If this flag is enabled, the file will be stored with Carbon, but no processing will be done.

Response

200 - application/json
id
integer
required
source
enum<string>
required
Available options:
GOOGLE_CLOUD_STORAGE,
GOOGLE_DRIVE,
NOTION,
NOTION_DATABASE,
INTERCOM,
DROPBOX,
ONEDRIVE,
SHAREPOINT,
CONFLUENCE,
BOX,
ZENDESK,
ZOTERO,
S3,
AZURE_BLOB_STORAGE,
GMAIL,
OUTLOOK,
SERVICENOW,
TEXT,
CSV,
TSV,
PDF,
DOCX,
PPTX,
XLSX,
XLSM,
MD,
RTF,
JSON,
HTML,
RAW_TEXT,
WEB_SCRAPE,
RSS_FEED,
FRESHDESK,
GITBOOK,
SALESFORCE,
GITHUB,
SLACK,
GURU,
GONG,
JPG,
PNG,
JPEG,
MP3,
MP2,
AAC,
WAV,
FLAC,
PCM,
M4A,
OGG,
OPUS,
MPEG,
MPG,
MP4,
WMV,
AVI,
MOV,
MKV,
FLV,
WEBM,
EML,
MSG
organization_id
integer
required
organization_supplied_user_id
string
required
organization_user_data_source_id
integer | null
required
external_file_id
string
required
external_url
string | null
required
sync_status
enum<string>
required
Available options:
DELAYED,
QUEUED_FOR_SYNC,
SYNCING,
READY,
SYNC_ERROR,
EVALUATING_RESYNC,
RATE_LIMITED,
SYNC_ABORTED,
QUEUED_FOR_OCR,
READY_TO_SYNC
sync_error_message
string | null
required
last_sync
string | null
required
tags
object | null
required
file_statistics
object | null
required
file_metadata
object | null
required
embedding_properties
object | null
required
chunk_size
integer | null
required
chunk_overlap
integer | null
required
chunk_properties
object | null
required
ocr_properties
object
required
ocr_job_started_at
string | null
required
name
string | null
required
parent_id
integer | null
required
enable_auto_sync
boolean | null
required
presigned_url
string | null
required
parsed_text_url
string | null
required
additional_presigned_urls
object | null
required
skip_embedding_generation
boolean
required
source_created_at
string | null
required
generate_sparse_vectors
boolean | null
required
request_id
string | null
required
upload_id
string | null
required
sync_properties
object
required
messages_metadata
object
required
file_contents_deleted
boolean
default: falserequired
supports_cold_storage
boolean
required
hot_storage_time_to_live
integer | null
required
embedding_storage_status
enum<string>
required
Available options:
HOT_STORAGE,
HOT_TO_COLD,
COLD_STORAGE,
COLD_TO_HOT
created_at
string
required
updated_at
string
required