Elevenlabs scribe v1 Speech to Text

curl --request POST \
  --url https://api.myrouter.ai/v3/elevenlabs-scribe-v1 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.myrouter.ai/v3/elevenlabs-scribe-v1"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.myrouter.ai/v3/elevenlabs-scribe-v1', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.myrouter.ai/v3/elevenlabs-scribe-v1",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.myrouter.ai/v3/elevenlabs-scribe-v1"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.myrouter.ai/v3/elevenlabs-scribe-v1")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.myrouter.ai/v3/elevenlabs-scribe-v1")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

POST

elevenlabs-scribe-v1

Elevenlabs scribe v1 Speech to Text

curl --request POST \
  --url https://api.myrouter.ai/v3/elevenlabs-scribe-v1 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.myrouter.ai/v3/elevenlabs-scribe-v1"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.myrouter.ai/v3/elevenlabs-scribe-v1', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.myrouter.ai/v3/elevenlabs-scribe-v1",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.myrouter.ai/v3/elevenlabs-scribe-v1"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.myrouter.ai/v3/elevenlabs-scribe-v1")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.myrouter.ai/v3/elevenlabs-scribe-v1")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

Transcribes audio or video files. When use_multi_channel is true and the uploaded audio has multiple channels, returns a ‘transcripts’ object with one transcription per channel. Otherwise returns a single transcription result.

Request Headers

string

required

Enum: application/json

string

required

Bearer authentication format: Bearer {{API Key}}.

Request Body

integer

If specified, the system will make a best effort to sample deterministically. Repeated requests with the same seed and parameters should return the same result, but determinism is not guaranteed. Must be an integer between 0 and 2147483647.Range: [0, 2147483647]

boolean

default:false

Whether to annotate which speaker is currently speaking in the uploaded file.

string

default:"other"

Input audio format. Options are ‘pcm_s16le_16’ or ‘other’. pcm_s16le_16 requires audio to be 16kHz sample rate, 16-bit integer, mono, little-endian format, which has lower latency compared to encoded waveforms.Possible values: pcm_s16le_16, other

number

Controls the randomness of the transcription output. Value range is 0.0 to 2.0; higher values produce more diverse and less certain results. If omitted, the default temperature of the selected model will be used (typically 0).Range: [0, 2]

integer

Maximum number of speakers in the uploaded file. Can be used to help distinguish speakers. Up to 32 speakers supported.Range: [1, 32]

string

Specifies the ISO-639-1 or ISO-639-3 language code of the audio file. Specifying it in advance can sometimes improve transcription performance. Defaults to null, which will automatically detect the language.

boolean

default:true

Whether to tag audio events such as (laughter), (footsteps), etc. in the transcription.

string

required

HTTPS URL of the file to transcribe. Either file or cloud_storage_url must be provided. The file must be accessible via HTTPS and smaller than 2GB. Supports any valid HTTPS address, including cloud storage (AWS S3, GCS, Cloudflare R2, etc.), CDNs, or other HTTPS sources. Supports pre-signed URLs with tokens or URL query parameter authentication.

boolean

default:false

Whether the audio file is multi-channel with each channel containing only a single speaker. When enabled, each channel will be transcribed independently and the results will be combined. Each word in the output will include a channel_index field. Up to 5 channels supported.

number

Speaker diarization threshold. A higher value means a lower probability of one person being split into multiple speakers, but a higher probability of different people being merged into one speaker (fewer speakers identified). A lower value means a higher probability of one person being split into multiple speakers, but a lower probability of different people being merged (more speakers identified). Can only be set when diarize=True and num_speakers=None. Defaults to None, which selects a threshold based on the model ID (typically 0.22).Range: [0.1, 0.4]

string

default:"word"

Granularity of timestamps in the transcription. ‘word’ provides word-level timestamps, ‘character’ provides character-level timestamps.Possible values: none, word, character

Response

The response may be one of the following response types:

Response Type 1

string

required

The raw transcribed text.

array

required

List of words and their timing information.

Hide properties

number

End time (in seconds) of this word or sound in the audio.

string

required

The transcribed word or sound content.

string

required

The type of this word or sound. ‘audio_event’ is used for non-word sounds such as laughter or footsteps.Possible values: word, spacing, audio_event

number

Start time (in seconds) of this word or sound in the audio.

number

required

The log probability of predicting this word. logprob ranges from [-infinity, 0]; higher values indicate greater model confidence.

array

Characters that make up the word and their corresponding timing information.

Hide properties

number

End time (in seconds) of the character in the audio.

string

required

The transcribed character content.

number

Start time (in seconds) of the character in the audio.

string

Unique identifier of the speaker for this word.

integer

Channel index corresponding to this transcription (applicable for multi-channel audio).

string

required

Detected language code (e.g., ‘eng’ for English).

string

Unique transcription ID for this response.

number

required

Language detection confidence (between 0 and 1).

Response Type 2

array

required

List of transcriptions for each audio channel. Each transcription contains the text and word-level details for its respective channel.

Hide properties

string

required

The raw transcribed text.

array

required

List of words and their timing information.

Hide properties

number

End time (in seconds) of this word or sound in the audio.

string

required

The transcribed word or sound content.

string

required

The type of this word or sound. ‘audio_event’ is used for non-word sounds such as laughter or footsteps.Possible values: word, spacing, audio_event

number

Start time (in seconds) of this word or sound in the audio.

number

required

The log probability of predicting this word. logprob ranges from [-infinity, 0]; higher values indicate greater model confidence.

array

Characters that make up the word and their corresponding timing information.

Hide properties

number

End time (in seconds) of the character in the audio.

string

required

The transcribed character content.

number

Start time (in seconds) of the character in the audio.

string

Unique identifier of the speaker for this word.

integer

Channel index corresponding to this transcription (applicable for multi-channel audio).

string

required

Detected language code (e.g., ‘eng’ for English).

string

Unique transcription ID for this response.

number

required

Language detection confidence (between 0 and 1).

string

Unique transcription ID for this response.

MiniMax Quick Voice Cloning

Elevenlabs scribe v2 Speech to Text

API Basics

LLM

Image

Video

Audio

Elevenlabs scribe v1 Speech to Text

Request Headers

Request Body

Response

​Request Headers

​Request Body

​Response

Request Headers

Request Body

Response