REST API Integration

This guide covers how to use the Klarisent STT SDK for transcribing pre-recorded audio files via the REST API.

Sending Base64-Encoded Audio

To transcribe pre-recorded audio files:

import KlarisentSTT from 'klarisent-stt-sdk';
import { TRIGGER } from 'klarisent-stt-sdk/trigger.enum';
import fs from 'fs';

// Initialize the SDK
const stt = new KlarisentSTT({
  api_key: 'your-api-key'
});

// Function to transcribe an audio file
async function transcribeAudioFile(filePath) {
  try {
    // Read the audio file as base64 (max 20MB allowed)
    const audioBase64 = fs.readFileSync(filePath, { encoding: 'base64' });
    
    // Send the audio for transcription
    const result = await stt.sendBase64Audio({
      audio: audioBase64,
      fileName: 'recording.flac', // Optional filename
      language: 'en',  // Language: 'en' for English, 'hi' for Hindi
      trigger: [TRIGGER.QuestionDetection]  // Optional: Enable specific triggers
    });
    
    // Handle the transcription result
    console.log('Transcription:', result.data.transcription);
    console.log('Audio duration:', result.data.duration, 'seconds');
    console.log('Audio size:', result.data.audioSizeKB, 'KB');
    console.log('Transcription time:', result.data.transcriptionTime, 'ms');
    
    // Check for detected questions (if trigger was enabled)
    if (result.data.trigger && result.data.trigger[TRIGGER.QuestionDetection]) {
      console.log('Questions detected:', result.data.trigger[TRIGGER.QuestionDetection]);
    }
    
    return result;
  } catch (error) {
    console.error('Transcription failed:', error);
    throw error;
  }
}

// Example usage
transcribeAudioFile('path/to/your/audio.flac')
  .then(result => {
    console.log('Processing complete!');
  })
  .catch(error => {
    console.error('Processing failed:', error);
  });

Audio Format Requirements

For optimal recognition with the API, prepare your audio files using the following FFmpeg command:

ffmpeg -i <your file> \
  -ar 16000 \
  -ac 1 \
  -map 0:a \
  -c:a flac \
  <output file name>.flac

This converts your audio to:

16kHz sample rate
Mono (single channel)
FLAC encoding (for optimal quality and file size)
20 MB Max File Size

Language Support

The API currently supports the following languages:

// Language options
const languages = {
  English: 'en',
  Hindi: 'hi'
};

// Example: Transcribe Hindi audio
const result = await stt.sendBase64Audio({
  audio: audioBase64,
  language: 'hi',
  // other options...
});

Enabling Triggers

You can enable various analysis triggers to extract additional insights from your audio:

import { TRIGGER } from 'klarisent-stt-sdk/trigger.enum';

// Available triggers
const result = await stt.sendBase64Audio({
  audio: audioBase64,
  language: 'en',
  trigger: [
    TRIGGER.QuestionDetection,        // Detect questions in speech
    TRIGGER.SentimentAnalysis,        // Basic sentiment analysis
    TRIGGER.SentimentAnalysisAdvanced // More detailed sentiment analysis
  ]
});

// Accessing trigger results
if (result.data.trigger) {
  // Question detection results
  if (result.data.trigger[TRIGGER.QuestionDetection]) {
    const questions = result.data.trigger[TRIGGER.QuestionDetection];
    console.log('Questions:', questions);
  }
  
  // Basic sentiment analysis
  if (result.data.trigger[TRIGGER.SentimentAnalysis]) {
    const sentiment = result.data.trigger[TRIGGER.SentimentAnalysis];
    console.log('Sentiment:', sentiment.sentiment);
    console.log('Text analyzed:', sentiment.text);
  }
  
  // Advanced sentiment analysis
  if (result.data.trigger[TRIGGER.SentimentAnalysisAdvanced]) {
    const advancedSentiment = result.data.trigger[TRIGGER.SentimentAnalysisAdvanced];
    console.log('Advanced sentiment:', advancedSentiment.sentiment);
    console.log('Text analyzed:', advancedSentiment.text);
  }
}

Error Handling

Implement robust error handling for API requests:

try {
  const result = await stt.sendBase64Audio({
    audio: audioBase64,
    language: 'en'
  });
  // Process successful result
} catch (error) {
  // Handle different error types
  if (error.status === 401) {
    console.error('Authentication failed. Check your API key.');
  } else if (error.status === 413) {
    console.error('Audio file too large. Maximum size exceeded.');
  } else if (error.status === 400) {
    console.error('Bad request:', error.message);
  } else {
    console.error('Transcription failed:', error.message);
  }
}

Processing Multiple Files

For batch processing of multiple audio files:

async function batchTranscribe(filePaths) {
  const results = [];
  
  for (const filePath of filePaths) {
    try {
      console.log(`Processing ${filePath}...`);
      const audioBase64 = fs.readFileSync(filePath, { encoding: 'base64' });
      
      const result = await stt.sendBase64Audio({
        audio: audioBase64,
        fileName: filePath.split('/').pop(),
        language: 'en'
      });
      
      results.push({
        filePath,
        transcription: result.data.transcription,
        success: true
      });
    } catch (error) {
      results.push({
        filePath,
        error: error.message,
        success: false
      });
    }
  }
  
  return results;
}

// Example usage
const files = [
  'audio/file1.flac',
  'audio/file2.flac',
  'audio/file3.flac'
];

batchTranscribe(files)
  .then(results => {
    console.log('Batch processing complete:');
    results.forEach(result => {
      if (result.success) {
        console.log(`✅ ${result.filePath}: ${result.transcription.substring(0, 50)}...`);
      } else {
        console.log(`❌ ${result.filePath}: ${result.error}`);
      }
    });
  });

Converting Common Audio Formats

Helper function to convert common audio formats to the required FLAC format:

const { execSync } = require('child_process');
const fs = require('fs');
const path = require('path');

function convertToFlac(inputFile) {
  const tempDir = path.join(__dirname, 'temp');
  
  // Create temp directory if it doesn't exist
  if (!fs.existsSync(tempDir)) {
    fs.mkdirSync(tempDir);
  }
  
  const outputFile = path.join(tempDir, `${Date.now()}.flac`);
  
  try {
    // Convert using ffmpeg
    execSync(`ffmpeg -i "${inputFile}" -ar 16000 -ac 1 -map 0:a -c:a flac "${outputFile}"`);
    
    // Return the path to the converted file
    return outputFile;
  } catch (error) {
    console.error('Conversion failed:', error.message);
    throw new Error('Audio conversion failed');
  }
}

// Usage example
async function transcribeAnyAudioFile(filePath) {
  try {
    // Convert to FLAC if needed
    const flacFile = convertToFlac(filePath);
    
    // Read as base64
    const audioBase64 = fs.readFileSync(flacFile, { encoding: 'base64' });
    
    // Send for transcription
    const result = await stt.sendBase64Audio({
      audio: audioBase64,
      language: 'en'
    });
    
    // Clean up temp file
    fs.unlinkSync(flacFile);
    
    return result;
  } catch (error) {
    console.error('Process failed:', error);
    throw error;
  }
}

Complete API Example

Here's a complete example demonstrating API-based transcription:

import KlarisentSTT from 'klarisent-stt-sdk';
import { TRIGGER } from 'klarisent-stt-sdk/trigger.enum';
import fs from 'fs';
import path from 'path';

async function main() {
  // Initialize SDK
  const stt = new KlarisentSTT({
    api_key: process.env.KLARISENT_API_KEY,
    debug: true
  });
  
  // Path to audio file
  const audioFile = path.join(__dirname, 'samples', 'recording.flac');
  
  try {
    // Read the file
    console.log(`Reading file: ${audioFile}`);
    const audioBase64 = fs.readFileSync(audioFile, { encoding: 'base64' });
    
    // Send for transcription
    console.log('Sending audio for transcription...');
    const result = await stt.sendBase64Audio({
      audio: audioBase64,
      fileName: path.basename(audioFile),
      language: 'en',
      trigger: [
        TRIGGER.QuestionDetection,
        TRIGGER.SentimentAnalysis
      ]
    });
    
    // Display results
    console.log('\n--- Transcription Results ---');
    console.log(`Status: ${result.status} - ${result.message}`);
    console.log(`Transcription: ${result.data.transcription}`);
    console.log(`Duration: ${result.data.duration.toFixed(2)} seconds`);
    console.log(`Size: ${result.data.audioSizeKB.toFixed(2)} KB`);
    console.log(`Processing Time: ${result.data.transcriptionTime} ms`);
    
    // Display trigger results
    if (result.data.trigger) {
      if (result.data.trigger[TRIGGER.QuestionDetection]) {
        console.log('\n--- Detected Questions ---');
        result.data.trigger[TRIGGER.QuestionDetection].forEach((q, i) => {
          console.log(`${i+1}. ${q}`);
        });
      }
      
      if (result.data.trigger[TRIGGER.SentimentAnalysis]) {
        const sentiment = result.data.trigger[TRIGGER.SentimentAnalysis];
        console.log('\n--- Sentiment Analysis ---');
        console.log(`Sentiment: ${sentiment.sentiment}`);
      }
    }
    
    // Write results to file
    const outputFile = path.join(__dirname, 'results', `${path.basename(audioFile, '.flac')}.json`);
    fs.writeFileSync(outputFile, JSON.stringify(result, null, 2));
    console.log(`\nResults saved to: ${outputFile}`);
    
  } catch (error) {
    console.error('Error during transcription:', error);
    process.exit(1);
  }
}

main();

PreviousRealtime Streaming NextSDK Reference

Last updated 3 months ago