Overview ↗
noOriginal Documentation
Documentation Index#
Fetch the complete documentation index at: https://docs.together.ai/llms.txt Use this file to discover all available pages before exploring further.
Welcome to Together AI’s docs! Together makes it easy to run, finetune, and train open source AI models with transparency and privacy.
export const ModelGrid = () => { const modelGroups = [{ title: “Chat models:”, link: “/docs/serverless-models#chat-models”, hasViewAll: true, items: [{ name: “DeepSeek R1”, icon: “/images/intro/deepseek.png”, description: “Upgraded DeepSeek-R1 with better reasoning, function calling, and coding, using 23K-token thinking to score 87.5% on AIME.”, link: “https://www.together.ai/models/deepseek-r1” }, { name: “DeepSeek V3.1”, icon: “/images/intro/deepseek.png”, description: “671B parameters (37B activated), 128K context, hybrid thinking/non-thinking modes, advanced tool calling, agent capabilities”, link: “https://www.together.ai/models/deepseek-v3-1” }, { name: “GPT-OSS-120B”, icon: “/images/intro/gpt.png”, description: “120B parameters, 128K context, reasoning with chain-of-thought, MoE architecture, Apache 2.0 license”, link: “https://www.together.ai/models/gpt-oss-120b” }, { name: “Llama 4 Maverick”, icon: “/images/intro/meta.png”, description: “SOTA 128-expert MoE powerhouse for multilingual image/text understanding, creative writing, and enterprise-scale applications.”, link: “https://www.together.ai/models/llama-4-maverick” }, { name: “Qwen 3 Next 80B”, icon: “/images/intro/qwen.png”, description: “80B parameters (3B activated), instruction-tuned MoE, 10x faster inference, hybrid attention mechanisms”, link: “https://www.together.ai/models/qwen3-next-80b-a3b-instruct” }, { name: “Kimi K2 0905”, icon: “/images/intro/kimi.png”, description: “Upgraded state-of-the-art mixture-of-experts agentic intelligence model with 1T parameters, 256K context, and native tool use”, link: “https://www.together.ai/models/kimi-k2-0905” }] }, { title: “Image models:”, link: “/docs/serverless-models#image-models”, hasViewAll: true, items: [{ name: “FLUX.1 [schnell]”, icon: “/images/intro/flux.png”, description: “Fastest available endpoint for the SOTA open-source image generation model by Black Forest Labs.”, link: “https://www.together.ai/models/flux-1-schnell” }, { name: “FLUX 1.1 [pro]”, icon: “/images/intro/flux.png”, description: “Premium image generation model by Black Forest Labs.”, link: “https://www.together.ai/models/flux1-1-pro” }] }, { title: “Vision models:”, link: “/docs/serverless-models#vision-models”, hasViewAll: true, items: [{ name: “Llama 4 Scout”, icon: “/images/intro/meta.png”, description: “SOTA 109B model with 17B active params & large context, excelling at multi-document analysis, codebase reasoning, and personalized tasks.”, link: “https://www.together.ai/models/llama-4-scout” }, { name: “Qwen2.5 VL 72B”, icon: “/images/intro/qwen.png”, description: “Vision-language model with advanced visual reasoning, video understanding, structured outputs, and agentic capabilities.”, link: “https://www.together.ai/models/qwen2-5-vl-72b-instruct” }] }, { title: “Audio models:”, link: “/docs/serverless-models#audio-models”, hasViewAll: true, items: [{ name: “Cartesia Sonic 2”, icon: “/images/intro/cartesia.png”, description: “Low-latency, ultra-realistic voice model, served in partnership with Cartesia.”, link: “https://www.together.ai/models/cartesia-sonic” }, { name: “Whisper Large v3”, icon: “/images/intro/gpt.png”, description: “High-performance speech-to-text model delivering transcription 15x faster than OpenAI with support for 1GB+ files, 50+ languages, and production-ready infrastructure.”, link: “https://www.together.ai/models/openai-whisper-large-v3” }] }, { title: “Embedding models:”, link: “/docs/serverless-models#embedding-models”, hasViewAll: false, items: [{ name: “M2-BERT 80M 2K”, icon: “/images/intro/bert.png”, description: “An 80M checkpoint of M2-BERT, pretrained with sequence length 2048, and it has been fine-tuned for long-context retrieval.”, link: “https://www.together.ai/models/m2-bert-80m-2k-retrieval” }, { name: “BGE-Base-EN”, icon: “/images/intro/baai.png”, description: “This model maps any text to a low-dimensional dense vector using FlagEmbedding.”, link: “https://www.together.ai/models/bge-base-en-v1-5” }] }, { title: “Rerank models:”, link: “/docs/serverless-models#rerank-models”, hasViewAll: false, items: [{ name: “Salesforce LlamaRank”, icon: “/images/intro/salesforce.png”, description: “Salesforce Research’s proprietary fine-tuned rerank model with 8K context, outperforming Cohere Rerank for superior document retrieval.”, link: “https://www.together.ai/models/salesforce-llamarank” }, { name: “Mxbai Rerank Large V2”, icon: “/images/intro/mxbai.png”, description: “1.5B-parameter RL-trained reranking model achieving state-of-the-art accuracy across 100+ languages with 8K context, outperforming Cohere and Voyage.”, link: “https://www.together.ai/models/mxbai-rerank-large-v2” }] }]; const getGridStyle = index => { const styles = [{ gridRow: “span 4” }, { gridRow: “span 2” }, { gridRow: “span 2” }, { gridRow: “span 2” }, { gridRow: “span 1” }, { gridRow: “span 1” }]; return styles[index] || ({}); }; return {modelGroups.map((group, index) => { const models = group.items; return
<h3 className="text-base text-left text-[#171a1e] dark:text-white font-normal my-0 leading-[24px]">
{group.title}
</h3>
{group.hasViewAll &&
<p className="text-sm font-light text-neutral-500 dark:text-gray-100 mr-2 whitespace-nowrap">
View all models
</p>
<svg width={5} height={8} viewBox="0 0 5 8" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M1 1L4 4L1 7" stroke="currentColor" strokeLinecap="round" />
</svg>
}
{models.map((item, i) => <a key={i} href={item.link} target="_blank" rel="noopener noreferrer" className="flex items-center border-none gap-3 hover:bg-gray-50 dark:hover:bg-gray-700 transition-all rounded-md p-1" title={item.description}>
<img noZoom src={item.icon} alt="" />
{group.hasViewAll && <p className="text-sm text-left text-neutral-700 dark:text-gray-100 whitespace-nowrap font-normal leading-[26px]">
{item.name}
</p>}
</a>)}
{!group.hasViewAll && <svg width={5} height={8} viewBox="0 0 5 8" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M0.930237 1.11548L4.06977 4.00009L0.930237 6.88471" stroke="currentColor" strokeLinecap="round" />
</svg>}
</a>;
})} ; };
export const WideCtaCard = ({title, description, iconUrl, href}) => {
const cardContent =
{iconUrl && }
<p className="text-base text-center text-[#0a0a0a] dark:text-white">
{title}
</p>
<p className="text-sm text-center text-[#3e4146] dark:text-gray-100 mt-2 max-w-[208px]">
{description}
</p>
; return href ? {cardContent} : cardContent; };
export const CtaCard = ({title, description, border = true, iconUrl, href}) => {
const cardContent =
{iconUrl ? : }
<p className="text-base text-left text-[#0a0a0a] dark:text-white">
{title}
</p>
<p className="text-sm font-light text-left text-neutral-700 dark:text-gray-100 mt-2">
{description}
</p>
; return href ? {cardContent} : cardContent; };
export const GridCards = ({children}) => { return {children} ; };
export const Quickstart = ({}) => { return
<p className="flex-grow-0 flex-shrink-0 text-xs font-medium text-center text-[#1d293d]">
python
</p>
<p className="flex-grow-0 flex-shrink-0 text-xs text-center text-[#707377]">
typescript
</p>
<p className="flex-grow-0 flex-shrink-0 text-xs text-center text-[#707377]">
curL
</p>
<p className="w-[243.82px] h-[42px] absolute left-2.5 top-[3px] text-sm text-left">
<span className="w-[243.82px] h-[42px] text-sm text-left text-[#cf222e]">
from
</span>
<span className="w-[243.82px] h-[42px] text-sm text-left text-[#1f2328]">
together
</span>
<span className="w-[243.82px] h-[42px] text-sm text-left text-[#cf222e]">
import
</span>
<span className="w-[243.82px] h-[42px] text-sm text-left text-[#1f2328]">
Together
</span>
<br />
<span className="w-[243.82px] h-[42px] text-sm text-left text-[#1f2328]">
client
</span>
<span className="w-[243.82px] h-[42px] text-sm text-left text-[#cf222e]">
=
</span>
<span className="w-[243.82px] h-[42px] text-sm text-left text-[#1f2328]">
Together()
</span>
</p>
<p className="w-[625px] absolute left-2.5 top-[72px] text-sm text-left">
<span className="w-[625px] text-sm text-left text-[#1f2328]">
completion
</span>
<span className="w-[625px] text-sm text-left text-[#cf222e]">
=
</span>
<span className="w-[625px] text-sm text-left text-[#1f2328]">
client.chat.completions.create(
</span>
<br />
<span className="w-[625px] text-sm text-left text-[#953800]">
model
</span>
<span className="w-[625px] text-sm text-left text-[#cf222e]">
=
</span>
<span className="w-[625px] text-sm text-left text-[#0a3069]">
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
</span>
<span className="w-[625px] text-sm text-left text-[#1f2328]">
,
</span>
<br />
<span className="w-[625px] text-sm text-left text-[#953800]">
messages
</span>
<span className="w-[625px] text-sm text-left text-[#cf222e]">
=
</span>
<span className="w-[625px] text-sm text-left text-[#1f2328]">
[{"{"}
</span>
<span className="w-[625px] text-sm text-left text-[#0a3069]">
"role"
</span>
<span className="w-[625px] text-sm text-left text-[#1f2328]">
:
</span>
<span className="w-[625px] text-sm text-left text-[#0a3069]">
"user"
</span>
<span className="w-[625px] text-sm text-left text-[#1f2328]">
,
</span>
<span className="w-[625px] text-sm text-left text-[#0a3069]">
"content"
</span>
<span className="w-[625px] text-sm text-left text-[#1f2328]">
:
</span>
<span className="w-[625px] text-sm text-left text-[#0a3069]">
"What are the top 3 things to do in New York?"
</span>
<span className="w-[625px] text-sm text-left text-[#1f2328]">
{"}"}],)
</span>
</p>
<p className="w-[369.95px] h-[18px] absolute left-2.5 top-[195px] text-sm text-left">
<span className="w-[369.95px] h-[18px] text-sm text-left text-[#0550ae]">
print
</span>
<span className="w-[369.95px] h-[18px] text-sm text-left text-[#1f2328]">
(completion.choices[
</span>
<span className="w-[369.95px] h-[18px] text-sm text-left text-[#0550ae]">
0
</span>
<span className="w-[369.95px] h-[18px] text-sm text-left text-[#1f2328]">
].message.content)
</span>
</p>
<p className="self-stretch flex-grow-0 flex-shrink-0 w-[9px] opacity-20 text-sm text-left text-black">
123456789
</p>
<p className="absolute left-7 top-6 text-base font-medium text-left text-[#171a1e]">
Developer Quickstart
</p>
<p className="w-[293px] absolute left-7 top-[58px] text-sm text-left">
<span className="w-[293px] text-sm text-left text-[#3e4146]">
Copy this snippet to get started with our inference API. See our
</span>
<span className="w-[293px] text-sm font-medium text-left text-black">
full quickstart
</span>
<span className="w-[293px] text-sm text-left text-[#3e4146]">
for more details.
</span>
</p>
; };
export const SubHeading = ({heading, description}) => { return
{heading}
{description}
; };from together import Together
client = Together()
completion = client.chat.completions.create(
model="openai/gpt-oss-20b",
messages=[{"role": "user", "content": "What are the top 3 things to do in New York?"}],
)
print(completion.choices[0].message.content)import Together from 'together-ai';
const together = new Together();
const completion = await together.chat.completions.create({
model: 'openai/gpt-oss-20b',
messages: [{ role: 'user', content: 'Top 3 things to do in New York?' }],
});
console.log(completion.choices[0].message.content);curl -X POST "https://api.together.xyz/v1/chat/completions" \
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "openai/gpt-oss-20b",
"messages": [
{"role": "user", "content": "What are the top 3 things to do in New York?"}
]
}'Deploy your own Dockerized workloads on Together’s managed GPU infrastructure. You bring the container — Together handles provisioning, autoscaling, and observability.