ElevenLabs

ElevenLabs

curl --request POST \
  --url https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "provider": "<string>",
  "asset_id": "<string>",
  "url": "<string>",
  "source_url": "<string>",
  "language_code": "<string>",
  "model_id": "<string>",
  "diarize": true,
  "timestamps_granularity": "<string>",
  "tag_audio_events": true,
  "num_speakers": 123,
  "file_format": "<string>",
  "source_lang": "<string>",
  "target_lang": "<string>"
}
'

import requests

url = "https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text"

payload = {
    "provider": "<string>",
    "asset_id": "<string>",
    "url": "<string>",
    "source_url": "<string>",
    "language_code": "<string>",
    "model_id": "<string>",
    "diarize": True,
    "timestamps_granularity": "<string>",
    "tag_audio_events": True,
    "num_speakers": 123,
    "file_format": "<string>",
    "source_lang": "<string>",
    "target_lang": "<string>"
}
headers = {
    "Authorization": "<authorization>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: '<authorization>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    provider: '<string>',
    asset_id: '<string>',
    url: '<string>',
    source_url: '<string>',
    language_code: '<string>',
    model_id: '<string>',
    diarize: true,
    timestamps_granularity: '<string>',
    tag_audio_events: true,
    num_speakers: 123,
    file_format: '<string>',
    source_lang: '<string>',
    target_lang: '<string>'
  })
};

fetch('https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'provider' => '<string>',
    'asset_id' => '<string>',
    'url' => '<string>',
    'source_url' => '<string>',
    'language_code' => '<string>',
    'model_id' => '<string>',
    'diarize' => true,
    'timestamps_granularity' => '<string>',
    'tag_audio_events' => true,
    'num_speakers' => 123,
    'file_format' => '<string>',
    'source_lang' => '<string>',
    'target_lang' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text"

	payload := strings.NewReader("{\n  \"provider\": \"<string>\",\n  \"asset_id\": \"<string>\",\n  \"url\": \"<string>\",\n  \"source_url\": \"<string>\",\n  \"language_code\": \"<string>\",\n  \"model_id\": \"<string>\",\n  \"diarize\": true,\n  \"timestamps_granularity\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"num_speakers\": 123,\n  \"file_format\": \"<string>\",\n  \"source_lang\": \"<string>\",\n  \"target_lang\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "<authorization>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text")
  .header("Authorization", "<authorization>")
  .header("Content-Type", "application/json")
  .body("{\n  \"provider\": \"<string>\",\n  \"asset_id\": \"<string>\",\n  \"url\": \"<string>\",\n  \"source_url\": \"<string>\",\n  \"language_code\": \"<string>\",\n  \"model_id\": \"<string>\",\n  \"diarize\": true,\n  \"timestamps_granularity\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"num_speakers\": 123,\n  \"file_format\": \"<string>\",\n  \"source_lang\": \"<string>\",\n  \"target_lang\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = '<authorization>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"provider\": \"<string>\",\n  \"asset_id\": \"<string>\",\n  \"url\": \"<string>\",\n  \"source_url\": \"<string>\",\n  \"language_code\": \"<string>\",\n  \"model_id\": \"<string>\",\n  \"diarize\": true,\n  \"timestamps_granularity\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"num_speakers\": 123,\n  \"file_format\": \"<string>\",\n  \"source_lang\": \"<string>\",\n  \"target_lang\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

{
  "code": 200,
  "msg": "success",
  "data": {
    "language_code": "en",
    "language_probability": 0.98,
    "text": "Hello, how are you today? I'm doing well, thank you.",
    "words": [
      {
        "text": "Hello,",
        "start": 0.0,
        "end": 0.52,
        "type": "word",
        "speaker_id": "speaker_0"
      },
      {
        "text": " ",
        "start": 0.52,
        "end": 0.52,
        "type": "spacing"
      },
      {
        "text": "how",
        "start": 0.52,
        "end": 0.78,
        "type": "word",
        "speaker_id": "speaker_0"
      }
    ]
  },
  "failed": false,
  "success": true
}

POST

serve

api

transcriptions

speech-to-text

ElevenLabs

curl --request POST \
  --url https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "provider": "<string>",
  "asset_id": "<string>",
  "url": "<string>",
  "source_url": "<string>",
  "language_code": "<string>",
  "model_id": "<string>",
  "diarize": true,
  "timestamps_granularity": "<string>",
  "tag_audio_events": true,
  "num_speakers": 123,
  "file_format": "<string>",
  "source_lang": "<string>",
  "target_lang": "<string>"
}
'

import requests

url = "https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text"

payload = {
    "provider": "<string>",
    "asset_id": "<string>",
    "url": "<string>",
    "source_url": "<string>",
    "language_code": "<string>",
    "model_id": "<string>",
    "diarize": True,
    "timestamps_granularity": "<string>",
    "tag_audio_events": True,
    "num_speakers": 123,
    "file_format": "<string>",
    "source_lang": "<string>",
    "target_lang": "<string>"
}
headers = {
    "Authorization": "<authorization>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: '<authorization>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    provider: '<string>',
    asset_id: '<string>',
    url: '<string>',
    source_url: '<string>',
    language_code: '<string>',
    model_id: '<string>',
    diarize: true,
    timestamps_granularity: '<string>',
    tag_audio_events: true,
    num_speakers: 123,
    file_format: '<string>',
    source_lang: '<string>',
    target_lang: '<string>'
  })
};

fetch('https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'provider' => '<string>',
    'asset_id' => '<string>',
    'url' => '<string>',
    'source_url' => '<string>',
    'language_code' => '<string>',
    'model_id' => '<string>',
    'diarize' => true,
    'timestamps_granularity' => '<string>',
    'tag_audio_events' => true,
    'num_speakers' => 123,
    'file_format' => '<string>',
    'source_lang' => '<string>',
    'target_lang' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text"

	payload := strings.NewReader("{\n  \"provider\": \"<string>\",\n  \"asset_id\": \"<string>\",\n  \"url\": \"<string>\",\n  \"source_url\": \"<string>\",\n  \"language_code\": \"<string>\",\n  \"model_id\": \"<string>\",\n  \"diarize\": true,\n  \"timestamps_granularity\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"num_speakers\": 123,\n  \"file_format\": \"<string>\",\n  \"source_lang\": \"<string>\",\n  \"target_lang\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "<authorization>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text")
  .header("Authorization", "<authorization>")
  .header("Content-Type", "application/json")
  .body("{\n  \"provider\": \"<string>\",\n  \"asset_id\": \"<string>\",\n  \"url\": \"<string>\",\n  \"source_url\": \"<string>\",\n  \"language_code\": \"<string>\",\n  \"model_id\": \"<string>\",\n  \"diarize\": true,\n  \"timestamps_granularity\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"num_speakers\": 123,\n  \"file_format\": \"<string>\",\n  \"source_lang\": \"<string>\",\n  \"target_lang\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = '<authorization>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"provider\": \"<string>\",\n  \"asset_id\": \"<string>\",\n  \"url\": \"<string>\",\n  \"source_url\": \"<string>\",\n  \"language_code\": \"<string>\",\n  \"model_id\": \"<string>\",\n  \"diarize\": true,\n  \"timestamps_granularity\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"num_speakers\": 123,\n  \"file_format\": \"<string>\",\n  \"source_lang\": \"<string>\",\n  \"target_lang\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

{
  "code": 200,
  "msg": "success",
  "data": {
    "language_code": "en",
    "language_probability": 0.98,
    "text": "Hello, how are you today? I'm doing well, thank you.",
    "words": [
      {
        "text": "Hello,",
        "start": 0.0,
        "end": 0.52,
        "type": "word",
        "speaker_id": "speaker_0"
      },
      {
        "text": " ",
        "start": 0.52,
        "end": 0.52,
        "type": "spacing"
      },
      {
        "text": "how",
        "start": 0.52,
        "end": 0.78,
        "type": "word",
        "speaker_id": "speaker_0"
      }
    ]
  },
  "failed": false,
  "success": true
}

Product: Visual Intelligence — Audio File Transcription Use case: Transcribe an uploaded audio/video file to text — async batch or sync, multiple providers (Whisper, ElevenLabs, AssemblyAI) with optional speaker labels. For live streams, see Live Audio Transcription. Host: https://mavi-backend.memories.ai/serve/api/v2 Auth: Authorization: sk-mavi-... (no Bearer prefix)

Uses ElevenLabs Scribe V2 model. Returns results synchronously.

Pricing: $0.39/hour of audio, billed by actual audio duration (in seconds).

Audio Source

You must provide one of the following (priority: asset_id > url > source_url).

Parameters

Authorization

string

required

API key for authentication (e.g. sk-mavi-...).

provider

string

default:"elevenlabs"

STT provider. Use elevenlabs for this endpoint.

asset_id

string

The unique identifier of an uploaded audio/video asset (e.g. re_xxx). Resolved to a signed GCS URL.

url

string

A publicly accessible audio URL.

source_url

string

A gs:// GCS path or public HTTP URL. GCS paths are converted to signed URLs automatically.

language_code

string

Language code (ISO 639-1, e.g. en, zh). If omitted, the provider auto-detects the language.

model_id

string

default:"scribe_v2"

Model to use.

diarize

boolean

Enable speaker diarization.

timestamps_granularity

string

Timestamp level: none, segment, or word.

tag_audio_events

boolean

Tag audio events such as music, laughter, applause.

num_speakers

integer

Expected number of speakers (improves diarization).

file_format

string

Audio format hint (e.g. pcm_s16le_16000).

source_lang

string

Source language for translation.

target_lang

string

Target language for translation.

Code Examples

curl --request POST \
  --url https://mavi-backend.memories.ai/serve/api/v2/transcriptions/speech-to-text \
  --header 'Authorization: sk-mavi-...' \
  --header 'Content-Type: application/json' \
  --data '{
    "provider": "elevenlabs",
    "asset_id": "re_657929111888723968",
    "model_id": "scribe_v2",
    "language_code": "en",
    "diarize": true,
    "timestamps_granularity": "word",
    "num_speakers": 2
  }'

const BASE_URL = "https://mavi-backend.memories.ai/serve/api/v2/transcriptions";
const API_KEY = "sk-mavi-...";

const response = await fetch(`${BASE_URL}/speech-to-text`, {
  method: 'POST',
  headers: {
    'Content-Type': 'application/json',
    'Authorization': API_KEY
  },
  body: JSON.stringify({
    provider: 'elevenlabs',
    asset_id: 're_657929111888723968',
    model_id: 'scribe_v2',
    language_code: 'en',
    diarize: true,
    timestamps_granularity: 'word',
    num_speakers: 2
  })
});

const data = await response.json();
console.log(data);

import requests

BASE_URL = "https://mavi-backend.memories.ai/serve/api/v2/transcriptions"
API_KEY = "sk-mavi-..."
HEADERS = {
    "Authorization": API_KEY,
    "Content-Type": "application/json"
}

response = requests.post(f"{BASE_URL}/speech-to-text", json={
    "provider": "elevenlabs",
    "asset_id": "re_657929111888723968",
    "model_id": "scribe_v2",
    "language_code": "en",
    "diarize": True,
    "timestamps_granularity": "word",
    "num_speakers": 2
}, headers=HEADERS)

print(response.json())

Response

{
  "code": 200,
  "msg": "success",
  "data": {
    "language_code": "en",
    "language_probability": 0.98,
    "text": "Hello, how are you today? I'm doing well, thank you.",
    "words": [
      {
        "text": "Hello,",
        "start": 0.0,
        "end": 0.52,
        "type": "word",
        "speaker_id": "speaker_0"
      },
      {
        "text": " ",
        "start": 0.52,
        "end": 0.52,
        "type": "spacing"
      },
      {
        "text": "how",
        "start": 0.52,
        "end": 0.78,
        "type": "word",
        "speaker_id": "speaker_0"
      }
    ]
  },
  "failed": false,
  "success": true
}

Response Parameters

Parameter	Type	Description
data.language_code	string	Detected language code (ISO 639-1)
data.language_probability	number	Confidence of language detection (0.0–1.0)
data.text	string	Full transcription text
data.words	array[object]	Word-level transcription with timing
data.words[].text	string	The word or spacing text
data.words[].start	number	Start time in seconds
data.words[].end	number	End time in seconds
data.words[].type	string	Token type: `word`, `spacing`, or `audio_event`
data.words[].speaker_id	string	Speaker identifier (e.g. `speaker_0`). Only present when `diarize=true`.

Timestamps are in seconds (e.g. 0.52).

Twitter Video Caption AssemblyAI

Get Started

Asset Management

Social Media Scraping

Audio File Transcription

Live Audio Transcription

Video Model APIs

Video Task APIs

Live Video Content Moderation

Live Video Understanding

Image Model APIs

Embeddings

Human ReID & Caption

Reference

Audio Source

Parameters

Code Examples

Response

Response Parameters

​Audio Source

​Parameters

​Code Examples

​Response

​Response Parameters

Audio Source

Parameters

Code Examples

Response

Response Parameters