PyWhisperCpp API Reference

pywhispercpp.model

This module contains a simple Python API on-top of the C-style whisper.cpp API.

Segment

Segment(t0, t1, text)

A small class representing a transcription segment

Parameters:

t0 (int) –

start time
t1 (int) –

end time
text (str) –

text

Source code in pywhispercpp/model.py

def __init__(self, t0: int, t1: int, text: str):
    """
    :param t0: start time
    :param t1: end time
    :param text: text
    """
    self.t0 = t0
    self.t1 = t1
    self.text = text

Model

Model(
    model="tiny",
    models_dir=None,
    params_sampling_strategy=0,
    redirect_whispercpp_logs_to=False,
    **params
)

This classes defines a Whisper.cpp model.

Example usage.

model = Model('base.en', n_threads=6)
segments = model.transcribe('file.mp3')
for segment in segments:
    print(segment.text)

Parameters:

model (str, default: 'tiny' ) –

The name of the model, one of the AVAILABLE_MODELS, (default to tiny), or a direct path to a ggml model.
models_dir (str, default: None ) –

The directory where the models are stored, or where they will be downloaded if they don't exist, default to MODELS_DIR
params_sampling_strategy (int, default: 0 ) –

0 -> GREEDY, else BEAM_SEARCH
redirect_whispercpp_logs_to (Union[bool, TextIO, str, None], default: False ) –

where to redirect the whisper.cpp logs, default to False (no redirection), accepts str file path, sys.stdout, sys.stderr, or use None to redirect to devnull
params –

keyword arguments for different whisper.cpp parameters, see PARAMS_SCHEMA

Source code in pywhispercpp/model.py

def __init__(self,
             model: str = 'tiny',
             models_dir: str = None,
             params_sampling_strategy: int = 0,
             redirect_whispercpp_logs_to: Union[bool, TextIO, str, None] = False,
             **params):
    """
    :param model: The name of the model, one of the [AVAILABLE_MODELS](/pywhispercpp/#pywhispercpp.constants.AVAILABLE_MODELS),
                    (default to `tiny`), or a direct path to a `ggml` model.
    :param models_dir: The directory where the models are stored, or where they will be downloaded if they don't
                        exist, default to [MODELS_DIR](/pywhispercpp/#pywhispercpp.constants.MODELS_DIR) <user_data_dir/pywhsipercpp/models>
    :param params_sampling_strategy: 0 -> GREEDY, else BEAM_SEARCH
    :param redirect_whispercpp_logs_to: where to redirect the whisper.cpp logs, default to False (no redirection), accepts str file path, sys.stdout, sys.stderr, or use None to redirect to devnull
    :param params: keyword arguments for different whisper.cpp parameters,
                    see [PARAMS_SCHEMA](/pywhispercpp/#pywhispercpp.constants.PARAMS_SCHEMA)
    """
    if Path(model).is_file():
        self.model_path = model
    else:
        self.model_path = utils.download_model(model, models_dir)
    self._ctx = None
    self._sampling_strategy = pw.whisper_sampling_strategy.WHISPER_SAMPLING_GREEDY if params_sampling_strategy == 0 else \
        pw.whisper_sampling_strategy.WHISPER_SAMPLING_BEAM_SEARCH
    self._params = pw.whisper_full_default_params(self._sampling_strategy)
    # assign params
    self._set_params(params)
    self.redirect_whispercpp_logs_to = redirect_whispercpp_logs_to
    # init the model
    self._init_model()

transcribe

transcribe(
    media,
    n_processors=None,
    new_segment_callback=None,
    **params
)

Transcribes the media provided as input and returns list of Segment objects. Accepts a media_file path (audio/video) or a raw numpy array.

Parameters:

media (Union[str, ndarray]) –

Media file path or a numpy array
n_processors (int, default: None ) –

if not None, it will run the transcription on multiple processes binding to whisper.cpp/whisper_full_parallel > Split the input audio in chunks and process each chunk separately using whisper_full()
new_segment_callback (Callable[[Segment], None], default: None ) –

callback function that will be called when a new segment is generated
params –

keyword arguments for different whisper.cpp parameters, see ::: constants.PARAMS_SCHEMA

Returns:

List[Segment] –

List of transcription segments

Source code in pywhispercpp/model.py

def transcribe(self,
               media: Union[str, np.ndarray],
               n_processors: int = None,
               new_segment_callback: Callable[[Segment], None] = None,
               **params) -> List[Segment]:
    """
    Transcribes the media provided as input and returns list of `Segment` objects.
    Accepts a media_file path (audio/video) or a raw numpy array.

    :param media: Media file path or a numpy array
    :param n_processors: if not None, it will run the transcription on multiple processes
                         binding to whisper.cpp/whisper_full_parallel
                         > Split the input audio in chunks and process each chunk separately using whisper_full()
    :param new_segment_callback: callback function that will be called when a new segment is generated
    :param params: keyword arguments for different whisper.cpp parameters, see ::: constants.PARAMS_SCHEMA

    :return: List of transcription segments
    """
    if type(media) is np.ndarray:
        audio = media
    else:
        if not Path(media).exists():
            raise FileNotFoundError(media)
        audio = self._load_audio(media)
    # update params if any
    self._set_params(params)

    # setting up callback
    if new_segment_callback:
        Model._new_segment_callback = new_segment_callback
        pw.assign_new_segment_callback(self._params, Model.__call_new_segment_callback)

    # run inference
    start_time = time()
    logger.info("Transcribing ...")
    res = self._transcribe(audio, n_processors=n_processors)
    end_time = time()
    logger.info(f"Inference time: {end_time - start_time:.3f} s")
    return res

get_params

get_params()

Returns a dict representation of the actual params

Returns:

dict –

params dict

Source code in pywhispercpp/model.py

def get_params(self) -> dict:
    """
    Returns a `dict` representation of the actual params

    :return: params dict
    """
    res = {}
    for param in dir(self._params):
        if param.startswith('__'):
            continue
        try:
            res[param] = getattr(self._params, param)
        except Exception:
            # ignore callback functions
            continue
    return res

get_params_schema `staticmethod`

get_params_schema()

A simple link to ::: constants.PARAMS_SCHEMA

Returns:

dict –

dict of params schema

Source code in pywhispercpp/model.py

@staticmethod
def get_params_schema() -> dict:
    """
    A simple link to ::: constants.PARAMS_SCHEMA
    :return: dict of params schema
    """
    return constants.PARAMS_SCHEMA

lang_max_id `staticmethod`

lang_max_id()

Returns number of supported languages. Direct binding to whisper.cpp/lang_max_id

Returns:

int –

Source code in pywhispercpp/model.py

@staticmethod
def lang_max_id() -> int:
    """
    Returns number of supported languages.
    Direct binding to whisper.cpp/lang_max_id
    :return:
    """
    return pw.whisper_lang_max_id()

print_timings

print_timings()

Direct binding to whisper.cpp/whisper_print_timings

Returns:

None –

None

Source code in pywhispercpp/model.py

def print_timings(self) -> None:
    """
    Direct binding to whisper.cpp/whisper_print_timings

    :return: None
    """
    pw.whisper_print_timings(self._ctx)

system_info `staticmethod`

system_info()

Direct binding to whisper.cpp/whisper_print_system_info

Returns:

None –

None

Source code in pywhispercpp/model.py

@staticmethod
def system_info() -> None:
    """
    Direct binding to whisper.cpp/whisper_print_system_info

    :return: None
    """
    return pw.whisper_print_system_info()

available_languages `staticmethod`

available_languages()

Returns a list of supported language codes

Returns:

list[str] –

list of supported language codes

Source code in pywhispercpp/model.py

@staticmethod
def available_languages() -> list[str]:
    """
    Returns a list of supported language codes

    :return: list of supported language codes
    """
    n = pw.whisper_lang_max_id()
    res = []
    for i in range(n):
        res.append(pw.whisper_lang_str(i))
    return res

auto_detect_language

auto_detect_language(media, offset_ms=0, n_threads=4)

Automatic language detection using whisper.cpp/whisper_pcm_to_mel and whisper.cpp/whisper_lang_auto_detect

Parameters:

media (Union[str, ndarray]) –

Media file path or a numpy array
offset_ms (int, default: 0 ) –

offset in milliseconds
n_threads (int, default: 4 ) –

number of threads to use

Returns:

Tuple[Tuple[str, float32], dict[str, float32]] –

((detected_language, probability), probabilities for all languages)

Source code in pywhispercpp/model.py

def auto_detect_language(self,  media: Union[str, np.ndarray], offset_ms: int = 0, n_threads: int = 4) -> Tuple[Tuple[str, np.float32], dict[str, np.float32]]:
    """
    Automatic language detection using whisper.cpp/whisper_pcm_to_mel and whisper.cpp/whisper_lang_auto_detect

    :param media: Media file path or a numpy array
    :param offset_ms: offset in milliseconds
    :param n_threads: number of threads to use
    :return: ((detected_language, probability), probabilities for all languages)
    """
    if type(media) is np.ndarray:
        audio = media
    else:
        if not Path(media).exists():
            raise FileNotFoundError(media)
        audio = self._load_audio(media)

    pw.whisper_pcm_to_mel(self._ctx, audio, len(audio), n_threads)
    lang_max_id = self.lang_max_id()
    probs = np.zeros(lang_max_id, dtype=np.float32)
    auto_detect = pw.whisper_lang_auto_detect(self._ctx, offset_ms, n_threads, probs)
    langs = self.available_languages()
    lang_probs = {langs[i]: probs[i] for i in range(lang_max_id)}
    return (langs[auto_detect], probs[auto_detect]), lang_probs

pywhispercpp.constants

Constants

WHISPER_SAMPLE_RATE `module-attribute`

WHISPER_SAMPLE_RATE = WHISPER_SAMPLE_RATE

MODELS_BASE_URL `module-attribute`

MODELS_BASE_URL = (
    "https://huggingface.co/ggerganov/whisper.cpp"
)

MODELS_PREFIX_URL `module-attribute`

MODELS_PREFIX_URL = 'resolve/main/ggml'

PACKAGE_NAME `module-attribute`

PACKAGE_NAME = 'pywhispercpp'

MODELS_DIR `module-attribute`

MODELS_DIR = Path(user_data_dir(PACKAGE_NAME)) / 'models'

AVAILABLE_MODELS `module-attribute`

AVAILABLE_MODELS = [
    "base",
    "base-q5_1",
    "base-q8_0",
    "base.en",
    "base.en-q5_1",
    "base.en-q8_0",
    "large-v1",
    "large-v2",
    "large-v2-q5_0",
    "large-v2-q8_0",
    "large-v3",
    "large-v3-q5_0",
    "large-v3-turbo",
    "large-v3-turbo-q5_0",
    "large-v3-turbo-q8_0",
    "medium",
    "medium-q5_0",
    "medium-q8_0",
    "medium.en",
    "medium.en-q5_0",
    "medium.en-q8_0",
    "small",
    "small-q5_1",
    "small-q8_0",
    "small.en",
    "small.en-q5_1",
    "small.en-q8_0",
    "tiny",
    "tiny-q5_1",
    "tiny-q8_0",
    "tiny.en",
    "tiny.en-q5_1",
    "tiny.en-q8_0",
]

PARAMS_SCHEMA `module-attribute`

PARAMS_SCHEMA = {
    "n_threads": {
        "type": int,
        "description": "Number of threads to allocate for the inferencedefault to min(4, available hardware_concurrency)",
        "options": None,
        "default": None,
    },
    "n_max_text_ctx": {
        "type": int,
        "description": "max tokens to use from past text as prompt for the decoder",
        "options": None,
        "default": 16384,
    },
    "offset_ms": {
        "type": int,
        "description": "start offset in ms",
        "options": None,
        "default": 0,
    },
    "duration_ms": {
        "type": int,
        "description": "audio duration to process in ms",
        "options": None,
        "default": 0,
    },
    "translate": {
        "type": bool,
        "description": "whether to translate the audio to English",
        "options": None,
        "default": False,
    },
    "no_context": {
        "type": bool,
        "description": "do not use past transcription (if any) as initial prompt for the decoder",
        "options": None,
        "default": False,
    },
    "single_segment": {
        "type": bool,
        "description": "force single segment output (useful for streaming)",
        "options": None,
        "default": False,
    },
    "print_special": {
        "type": bool,
        "description": "print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)",
        "options": None,
        "default": False,
    },
    "print_progress": {
        "type": bool,
        "description": "print progress information",
        "options": None,
        "default": True,
    },
    "print_realtime": {
        "type": bool,
        "description": "print results from within whisper.cpp (avoid it, use callback instead)",
        "options": None,
        "default": False,
    },
    "print_timestamps": {
        "type": bool,
        "description": "print timestamps for each text segment when printing realtime",
        "options": None,
        "default": True,
    },
    "token_timestamps": {
        "type": bool,
        "description": "enable token-level timestamps",
        "options": None,
        "default": False,
    },
    "thold_pt": {
        "type": float,
        "description": "timestamp token probability threshold (~0.01)",
        "options": None,
        "default": 0.01,
    },
    "thold_ptsum": {
        "type": float,
        "description": "timestamp token sum probability threshold (~0.01)",
        "options": None,
        "default": 0.01,
    },
    "max_len": {
        "type": int,
        "description": "max segment length in characters, note: token_timestamps needs to be set to True for this to work",
        "options": None,
        "default": 0,
    },
    "split_on_word": {
        "type": bool,
        "description": "split on word rather than on token (when used with max_len)",
        "options": None,
        "default": False,
    },
    "max_tokens": {
        "type": int,
        "description": "max tokens per segment (0 = no limit)",
        "options": None,
        "default": 0,
    },
    "audio_ctx": {
        "type": int,
        "description": "overwrite the audio context size (0 = use default)",
        "options": None,
        "default": 0,
    },
    "initial_prompt": {
        "type": str,
        "description": "Initial prompt, these are prepended to any existing text context from a previous call",
        "options": None,
        "default": None,
    },
    "prompt_tokens": {
        "type": Tuple,
        "description": "tokens to provide to the whisper decoder as initial prompt",
        "options": None,
        "default": None,
    },
    "prompt_n_tokens": {
        "type": int,
        "description": "tokens to provide to the whisper decoder as initial prompt",
        "options": None,
        "default": 0,
    },
    "language": {
        "type": str,
        "description": 'for auto-detection, set to None, "" or "auto"',
        "options": None,
        "default": "",
    },
    "suppress_blank": {
        "type": bool,
        "description": "common decoding parameters",
        "options": None,
        "default": True,
    },
    "suppress_non_speech_tokens": {
        "type": bool,
        "description": "common decoding parameters",
        "options": None,
        "default": False,
    },
    "temperature": {
        "type": float,
        "description": "initial decoding temperature",
        "options": None,
        "default": 0.0,
    },
    "max_initial_ts": {
        "type": float,
        "description": "max_initial_ts",
        "options": None,
        "default": 1.0,
    },
    "length_penalty": {
        "type": float,
        "description": "length_penalty",
        "options": None,
        "default": -1.0,
    },
    "temperature_inc": {
        "type": float,
        "description": "temperature_inc",
        "options": None,
        "default": 0.2,
    },
    "entropy_thold": {
        "type": float,
        "description": 'similar to OpenAI\'s "compression_ratio_threshold"',
        "options": None,
        "default": 2.4,
    },
    "logprob_thold": {
        "type": float,
        "description": "logprob_thold",
        "options": None,
        "default": -1.0,
    },
    "no_speech_thold": {
        "type": float,
        "description": "no_speech_thold",
        "options": None,
        "default": 0.6,
    },
    "greedy": {
        "type": dict,
        "description": "greedy",
        "options": None,
        "default": {"best_of": -1},
    },
    "beam_search": {
        "type": dict,
        "description": "beam_search",
        "options": None,
        "default": {"beam_size": -1, "patience": -1.0},
    },
}

pywhispercpp.utils

Helper functions

download_model

download_model(
    model_name, download_dir=None, chunk_size=1024
)

Helper function to download the ggml models

Parameters:

model_name (str) –

name of the model, one of ::: constants.AVAILABLE_MODELS
download_dir –

Where to store the models
chunk_size –

size of the download chunk

Returns:

str –

Absolute path of the downloaded model

Source code in pywhispercpp/utils.py

def download_model(model_name: str, download_dir=None, chunk_size=1024) -> str:
    """
    Helper function to download the `ggml` models
    :param model_name: name of the model, one of ::: constants.AVAILABLE_MODELS
    :param download_dir: Where to store the models
    :param chunk_size: size of the download chunk

    :return: Absolute path of the downloaded model
    """
    if model_name not in AVAILABLE_MODELS:
        logger.error(f"Invalid model name `{model_name}`, available models are: {AVAILABLE_MODELS}")
        return
    if download_dir is None:
        download_dir = MODELS_DIR
        logger.info(f"No download directory was provided, models will be downloaded to {download_dir}")

    os.makedirs(download_dir, exist_ok=True)

    url = _get_model_url(model_name=model_name)
    file_path = Path(download_dir) / os.path.basename(url)
    # check if the file is already there
    if file_path.exists():
        logger.info(f"Model {model_name} already exists in {download_dir}")
    else:
        # download it from huggingface
        resp = requests.get(url, stream=True)
        total = int(resp.headers.get('content-length', 0))

        progress_bar = tqdm(desc=f"Downloading Model {model_name} ...",
                            total=total,
                            unit='iB',
                            unit_scale=True,
                            unit_divisor=1024)

        try:
            with open(file_path, 'wb') as file, progress_bar:
                for data in resp.iter_content(chunk_size=chunk_size):
                    size = file.write(data)
                    progress_bar.update(size)
            logger.info(f"Model downloaded to {file_path.absolute()}")
        except Exception as e:
            # error download, just remove the file
            os.remove(file_path)
            raise e
    return str(file_path.absolute())

to_timestamp

to_timestamp(t, separator=',')

376 -> 00:00:03,760 1344 -> 00:00:13,440

Implementation from whisper.cpp/examples/main

Parameters:

t (int) –

input time from whisper timestamps
separator –

seprator between seconds and milliseconds

Returns:

str –

time representation in hh: mm: ss[separator]ms

Source code in pywhispercpp/utils.py

def to_timestamp(t: int, separator=',') -> str:
    """
    376 -> 00:00:03,760
    1344 -> 00:00:13,440

    Implementation from `whisper.cpp/examples/main`

    :param t: input time from whisper timestamps
    :param separator: seprator between seconds and milliseconds
    :return: time representation in hh: mm: ss[separator]ms
    """
    # logic exactly from whisper.cpp

    msec = t * 10
    hr = msec // (1000 * 60 * 60)
    msec = msec - hr * (1000 * 60 * 60)
    min = msec // (1000 * 60)
    msec = msec - min * (1000 * 60)
    sec = msec // 1000
    msec = msec - sec * 1000
    return f"{int(hr):02,.0f}:{int(min):02,.0f}:{int(sec):02,.0f}{separator}{int(msec):03,.0f}"

output_txt

output_txt(segments, output_file_path)

Creates a raw text from a list of segments

Implementation from whisper.cpp/examples/main

Parameters:

segments (list) –

list of segments

Returns:

str –

path of the file

Source code in pywhispercpp/utils.py

def output_txt(segments: list, output_file_path: str) -> str:
    """
    Creates a raw text from a list of segments

    Implementation from `whisper.cpp/examples/main`

    :param segments: list of segments
    :return: path of the file
    """
    if not output_file_path.endswith('.txt'):
        output_file_path = output_file_path + '.txt'

    absolute_path = Path(output_file_path).absolute()

    with open(str(absolute_path), 'w') as file:
        for seg in segments:
            file.write(seg.text)
            file.write('\n')
    return absolute_path

output_vtt

output_vtt(segments, output_file_path)

Creates a vtt file from a list of segments

Implementation from whisper.cpp/examples/main

Parameters:

segments (list) –

list of segments

Returns:

str –

Absolute path of the file

Source code in pywhispercpp/utils.py

def output_vtt(segments: list, output_file_path: str) -> str:
    """
    Creates a vtt file from a list of segments

    Implementation from `whisper.cpp/examples/main`

    :param segments: list of segments
    :return: path of the file

    :return: Absolute path of the file
    """
    if not output_file_path.endswith('.vtt'):
        output_file_path = output_file_path + '.vtt'

    absolute_path = Path(output_file_path).absolute()

    with open(absolute_path, 'w') as file:
        file.write("WEBVTT\n\n")
        for seg in segments:
            file.write(f"{to_timestamp(seg.t0, separator='.')} --> {to_timestamp(seg.t1, separator='.')}\n")
            file.write(f"{seg.text}\n\n")
    return absolute_path

output_srt

output_srt(segments, output_file_path)

Creates a srt file from a list of segments

Parameters:

segments (list) –

list of segments

Returns:

str –

Absolute path of the file

Source code in pywhispercpp/utils.py

def output_srt(segments: list, output_file_path: str) -> str:
    """
    Creates a srt file from a list of segments

    :param segments: list of segments
    :return: path of the file

    :return: Absolute path of the file
    """
    if not output_file_path.endswith('.srt'):
        output_file_path = output_file_path + '.srt'

    absolute_path = Path(output_file_path).absolute()

    with open(absolute_path, 'w') as file:
        for i in range(len(segments)):
            seg = segments[i]
            file.write(f"{i+1}\n")
            file.write(f"{to_timestamp(seg.t0, separator=',')} --> {to_timestamp(seg.t1, separator=',')}\n")
            file.write(f"{seg.text}\n\n")
    return absolute_path

output_csv

output_csv(segments, output_file_path)

Creates a srt file from a list of segments

Parameters:

segments (list) –

list of segments

Returns:

str –

Absolute path of the file

Source code in pywhispercpp/utils.py

def output_csv(segments: list, output_file_path: str) -> str:
    """
    Creates a srt file from a list of segments

    :param segments: list of segments
    :return: path of the file

    :return: Absolute path of the file
    """
    if not output_file_path.endswith('.csv'):
        output_file_path = output_file_path + '.csv'

    absolute_path = Path(output_file_path).absolute()

    with open(absolute_path, 'w') as file:
        for seg in segments:
            file.write(f"{10 * seg.t0}, {10 * seg.t1}, \"{seg.text}\"\n")
    return absolute_path

redirect_stderr

redirect_stderr(to=False)

Redirect stderr to the specified target.

Parameters:

to –
- None to suppress output (redirect to devnull), - sys.stdout to redirect to stdout, - A file path (str) to redirect to a file, - False to do nothing (no redirection).

Source code in pywhispercpp/utils.py

@contextlib.contextmanager
def redirect_stderr(to=False) -> None:
    """
    Redirect stderr to the specified target.

    :param to:
        - None to suppress output (redirect to devnull),
        - sys.stdout to redirect to stdout,
        - A file path (str) to redirect to a file,
        - False to do nothing (no redirection).
    """

    if to is False:
        # do nothing
        yield
        return

    sys.stderr.flush()
    try:
        original_stderr_fd = sys.stderr.fileno()
        has_fileno = True
    except (AttributeError, OSError):
        # Jupyter or non-standard stderr implementations
        has_fileno = False

    if has_fileno:
        if to is None:
            target_fd = os.open(os.devnull, os.O_WRONLY)
        elif isinstance(to, str):
            file = open(to, 'w')
            target_fd = file.fileno()
        elif hasattr(to, 'fileno'):
            target_fd = to.fileno()
        else:
            raise ValueError("Invalid `to` parameter; must be None, a filepath string, or sys.stdout/sys.stderr.")
        os.dup2(target_fd, original_stderr_fd)
        try:
            yield
        finally:
            os.dup2(original_stderr_fd, original_stderr_fd)
            if isinstance(to, str):
                file.close()
            elif to is None:
                os.close(target_fd)
    else:
        # Replace sys.stderr directly
        original_stderr = sys.stderr
        if to is None:
            sys.stderr = open(os.devnull, 'w')
        elif isinstance(to, str):
            sys.stderr = open(to, 'w')
        elif hasattr(to, 'write'):
            sys.stderr = to
        try:
            yield
        finally:
            sys.stderr = original_stderr
            if isinstance(to, str) or to is None:
                sys.stderr.close()

pywhispercpp.examples

assistant

A simple example showcasing the use of pywhispercpp as an assistant. The idea is to use a VAD to detect speech (in this example we used webrtcvad), and when speech is detected we run the inference.

Assistant

Assistant(
    model="tiny",
    input_device=None,
    silence_threshold=8,
    q_threshold=16,
    block_duration=30,
    commands_callback=None,
    **model_params
)

Assistant class

Example usage

from pywhispercpp.examples.assistant import Assistant

my_assistant = Assistant(commands_callback=print, n_threads=8)
my_assistant.start()

Parameters:

model –

whisper.cpp model name or a direct path to aggml model
input_device (int, default: None ) –

The input device (aka microphone), keep it None to take the default
silence_threshold (int, default: 8 ) –

The duration of silence after which the inference will be running
q_threshold (int, default: 16 ) –

The inference won't be running until the data queue is having at least q_threshold elements
block_duration (int, default: 30 ) –

minimum time audio updates in ms
commands_callback (Callable[[str], None], default: None ) –

The callback to run when a command is received
model_log_level –

Logging level
model_params –

any other parameter to pass to the whsiper.cpp model see ::: pywhispercpp.constants.PARAMS_SCHEMA

Source code in pywhispercpp/examples/assistant.py

def __init__(self,
             model='tiny',
             input_device: int = None,
             silence_threshold: int = 8,
             q_threshold: int = 16,
             block_duration: int = 30,
             commands_callback: Callable[[str], None] = None,
             **model_params):

    """
    :param model: whisper.cpp model name or a direct path to a`ggml` model
    :param input_device: The input device (aka microphone), keep it None to take the default
    :param silence_threshold: The duration of silence after which the inference will be running
    :param q_threshold: The inference won't be running until the data queue is having at least `q_threshold` elements
    :param block_duration: minimum time audio updates in ms
    :param commands_callback: The callback to run when a command is received
    :param model_log_level: Logging level
    :param model_params: any other parameter to pass to the whsiper.cpp model see ::: pywhispercpp.constants.PARAMS_SCHEMA
    """

    self.input_device = input_device
    self.sample_rate = constants.WHISPER_SAMPLE_RATE  # same as whisper.cpp
    self.channels = 1  # same as whisper.cpp
    self.block_duration = block_duration
    self.block_size = int(self.sample_rate * self.block_duration / 1000)
    self.q = queue.Queue()

    self.vad = webrtcvad.Vad()
    self.silence_threshold = silence_threshold
    self.q_threshold = q_threshold
    self._silence_counter = 0

    self.pwccp_model = Model(model,
                             print_realtime=False,
                             print_progress=False,
                             print_timestamps=False,
                             single_segment=True,
                             no_context=True,
                             **model_params)
    self.commands_callback = commands_callback

start

start()

Use this function to start the assistant

Returns:

None –

None

Source code in pywhispercpp/examples/assistant.py

def start(self) -> None:
    """
    Use this function to start the assistant
    :return: None
    """
    logging.info(f"Starting Assistant ...")
    with sd.InputStream(
            device=self.input_device,  # the default input device
            channels=self.channels,
            samplerate=constants.WHISPER_SAMPLE_RATE,
            blocksize=self.block_size,
            callback=self._audio_callback):

        try:
            logging.info(f"Assistant is listening ... (CTRL+C to stop)")
            while True:
                time.sleep(0.1)
        except KeyboardInterrupt:
            logging.info("Assistant stopped")

gui

WorkerSignals

Bases: QObject

Defines signals available from a running worker thread. Supported signals are: - finished: No data - error: tuple (exctype, value, traceback.format_exc()) - result: list (the transcribed segments) - progress: int (0-100) - status_update: str