POST
/
uploadfile

Authorizations

authorization
string
headerrequired

token <token>, corresponds to temporary access tokens.

Query Parameters

chunk_size
integer | null

Chunk size in tiktoken tokens to be used when processing file.

chunk_overlap
integer | null

Chunk overlap in tiktoken tokens to be used when processing file.

skip_embedding_generation
boolean
default: false

Flag to control whether or not embeddings should be generated and stored when processing file.

set_page_as_boundary
boolean
default: false

Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.

embedding_model

Embedding model that will be used to embed file chunks.

Available options:
OPENAI,
AZURE_OPENAI,
COHERE_MULTILINGUAL_V3,
OPENAI_ADA_LARGE_256,
OPENAI_ADA_LARGE_1024,
OPENAI_ADA_LARGE_3072,
OPENAI_ADA_SMALL_512,
OPENAI_ADA_SMALL_1536,
AZURE_ADA_LARGE_256,
AZURE_ADA_LARGE_1024,
AZURE_ADA_LARGE_3072,
AZURE_ADA_SMALL_512,
AZURE_ADA_SMALL_1536,
SOLAR_1_MINI
use_ocr
boolean
default: false

Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text.

generate_sparse_vectors
boolean
default: false

Whether or not to generate sparse vectors for the file. This is required for the file to be a candidate for hybrid search.

prepend_filename_to_chunks
boolean
default: false

Whether or not to prepend the file's name to chunks.

max_items_per_chunk
integer | null

Number of objects per chunk. For csv, tsv, xlsx, and json files only.

parse_pdf_tables_with_ocr
boolean
default: false

Whether to use rich table parsing when use_ocr is enabled.

detect_audio_language
boolean
default: false

Whether to automatically detect the language of the uploaded audio file.

transcription_service
enum<string> | null

The transcription service to use for audio files. If no service is specified, 'deepgram' will be used.

Available options:
assemblyai,
deepgram
media_type
enum<string> | null

The media type of the file. If not provided, it will be inferred from the file extension.

Available options:
TEXT,
IMAGE,
AUDIO,
VIDEO
split_rows
boolean
default: false

Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.

Body

multipart/form-data
file
string
required

Response

200 - application/json
id
integer
required
source
enum<string>
required
Available options:
GOOGLE_DRIVE,
NOTION,
NOTION_DATABASE,
INTERCOM,
DROPBOX,
ONEDRIVE,
SHAREPOINT,
CONFLUENCE,
BOX,
ZENDESK,
ZOTERO,
S3,
GMAIL,
OUTLOOK,
TEXT,
CSV,
TSV,
PDF,
DOCX,
PPTX,
XLSX,
MD,
RTF,
JSON,
HTML,
RAW_TEXT,
WEB_SCRAPE,
RSS_FEED,
FRESHDESK,
GITBOOK,
SALESFORCE,
GITHUB,
SLACK,
JPG,
PNG,
JPEG,
MP3,
MP2,
AAC,
WAV,
FLAC,
PCM,
M4A,
OGG,
OPUS,
MPEG,
MPG,
MP4,
WMV,
AVI,
MOV,
MKV,
FLV,
WEBM,
EML
organization_id
integer
required
organization_supplied_user_id
string
required
organization_user_data_source_id
integer | null
required
external_file_id
string
required
external_url
string | null
required
sync_status
enum<string>
required
Available options:
DELAYED,
QUEUED_FOR_SYNC,
SYNCING,
READY,
SYNC_ERROR,
EVALUATING_RESYNC,
RATE_LIMITED,
SYNC_ABORTED,
QUEUED_FOR_OCR
sync_error_message
string | null
required
last_sync
string | null
required
tags
object | null
required
file_statistics
object | null
required
file_metadata
object | null
required
embedding_properties
object | null
required
chunk_size
integer | null
required
chunk_overlap
integer | null
required
chunk_properties
object | null
required
ocr_properties
object
required
ocr_job_started_at
string | null
required
name
string | null
required
parent_id
integer | null
required
enable_auto_sync
boolean | null
required
presigned_url
string | null
required
parsed_text_url
string | null
required
additional_presigned_urls
object | null
required
skip_embedding_generation
boolean
required
source_created_at
string | null
required
generate_sparse_vectors
boolean | null
required
request_id
string | null
required
sync_properties
object
required
messages_metadata
object
required
file_contents_deleted
boolean
default: falserequired
created_at
string
required
updated_at
string
required