Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

- feat: update llama.cpp to ggml-org/llama.cpp@8f83d6c27
- fix(example): support multi-step Responses tool streaming by @abetlen in #2288
- fix(ci): Repair Linux accelerator wheels for manylinux publishing

Expand Down
217 changes: 211 additions & 6 deletions llama_cpp/mtmd_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import os
import warnings
from ctypes import (
CFUNCTYPE,
c_bool,
c_char_p,
c_int,
c_int64,
c_uint8,
c_uint32,
c_size_t,
Expand Down Expand Up @@ -62,6 +64,9 @@
mtmd_bitmap_p = NewType("mtmd_bitmap_p", int)
mtmd_bitmap_p_ctypes = c_void_p

mtmd_helper_video_p = NewType("mtmd_helper_video_p", int)
mtmd_helper_video_p_ctypes = c_void_p

mtmd_image_tokens_p = NewType("mtmd_image_tokens_p", int)
mtmd_image_tokens_p_ctypes = c_void_p

Expand Down Expand Up @@ -151,6 +156,60 @@ class mtmd_caps(Structure):
]


mtmd_bitmap_lazy_callback = CFUNCTYPE(
c_int,
c_size_t,
c_void_p,
POINTER(mtmd_bitmap_p_ctypes),
POINTER(c_char_p),
)


class mtmd_helper_bitmap_wrapper(Structure):
"""Bitmap wrapper returned by MTMD helper media loaders."""

if TYPE_CHECKING:
bitmap: Optional[mtmd_bitmap_p]
video_ctx: Optional[mtmd_helper_video_p]

_fields_ = [
("bitmap", mtmd_bitmap_p_ctypes),
("video_ctx", mtmd_helper_video_p_ctypes),
]


class mtmd_helper_video_info(Structure):
"""Metadata for a decoded video stream."""

if TYPE_CHECKING:
width: int
height: int
fps: float
n_frames: int

_fields_ = [
("width", c_uint32),
("height", c_uint32),
("fps", c_float),
("n_frames", c_int),
]


class mtmd_helper_video_init_params(Structure):
"""Parameters for initializing an MTMD helper video stream."""

if TYPE_CHECKING:
fps_target: float
ffmpeg_bin_dir: Optional[bytes]
timestamp_interval_ms: int

_fields_ = [
("fps_target", c_float),
("ffmpeg_bin_dir", c_char_p),
("timestamp_interval_ms", c_int64),
]


################################################
# mtmd.h functions
################################################
Expand Down Expand Up @@ -234,6 +293,13 @@ def mtmd_get_audio_sample_rate(ctx: mtmd_context_p, /) -> int:
...


# MTMD_API const char * mtmd_get_marker(const mtmd_context * ctx);
@ctypes_function("mtmd_get_marker", [mtmd_context_p_ctypes], c_char_p)
def mtmd_get_marker(ctx: mtmd_context_p, /) -> Optional[bytes]:
"""Get the current media marker string."""
...


# Deprecated compatibility wrapper for the renamed mtmd_get_audio_sample_rate().
def mtmd_get_audio_bitrate(ctx: mtmd_context_p, /) -> int:
warnings.warn(
Expand Down Expand Up @@ -325,6 +391,26 @@ def mtmd_bitmap_set_id(bitmap: mtmd_bitmap_p, id: Optional[bytes], /):
...


# MTMD_API mtmd_bitmap * mtmd_bitmap_init_lazy(mtmd_context * ctx,
# const char * id,
# void * user_data,
# mtmd_bitmap_lazy_callback callback);
@ctypes_function(
"mtmd_bitmap_init_lazy",
[mtmd_context_p_ctypes, c_char_p, c_void_p, mtmd_bitmap_lazy_callback],
mtmd_bitmap_p_ctypes,
)
def mtmd_bitmap_init_lazy(
ctx: mtmd_context_p,
id: Optional[bytes],
user_data: c_void_p,
callback: mtmd_bitmap_lazy_callback,
/,
) -> Optional[mtmd_bitmap_p]:
"""Initialize a lazy MTMD bitmap."""
...


# MTMD_API mtmd_input_chunks * mtmd_input_chunks_init(void);
@ctypes_function("mtmd_input_chunks_init", [], mtmd_input_chunks_p_ctypes)
def mtmd_input_chunks_init() -> Optional[mtmd_input_chunks_p]: ...
Expand Down Expand Up @@ -551,32 +637,63 @@ def mtmd_test_create_input_chunks() -> Optional[mtmd_input_chunks_p]:
################################################


# MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder);
# MTMD_API bool mtmd_helper_support_video(mtmd_context * ctx);
@ctypes_function(
"mtmd_helper_support_video",
[mtmd_context_p_ctypes],
c_bool,
)
def mtmd_helper_support_video(ctx: mtmd_context_p, /) -> bool:
"""Check whether MTMD helper video support is available."""
...


# MTMD_API struct mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder);
@ctypes_function(
"mtmd_helper_bitmap_init_from_file",
[mtmd_context_p_ctypes, c_char_p, c_bool],
mtmd_bitmap_p_ctypes,
mtmd_helper_bitmap_wrapper,
)
def mtmd_helper_bitmap_init_from_file_wrapper(
ctx: mtmd_context_p, fname: bytes, placeholder: Union[c_bool, bool], /
) -> mtmd_helper_bitmap_wrapper:
"""Initialize an MTMD bitmap wrapper from a file."""
...


def mtmd_helper_bitmap_init_from_file(
ctx: mtmd_context_p, fname: bytes, placeholder: Union[c_bool, bool], /
) -> Optional[mtmd_bitmap_p]:
"""Initialize an MTMD bitmap from a file."""
...
return mtmd_helper_bitmap_init_from_file_wrapper(ctx, fname, placeholder).bitmap


# MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len, bool placeholder);
# MTMD_API struct mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len, bool placeholder);
@ctypes_function(
"mtmd_helper_bitmap_init_from_buf",
[mtmd_context_p_ctypes, POINTER(c_uint8), c_size_t, c_bool],
mtmd_bitmap_p_ctypes,
mtmd_helper_bitmap_wrapper,
)
def mtmd_helper_bitmap_init_from_buf_wrapper(
ctx: mtmd_context_p,
buf: CtypesArray[c_uint8],
length: Union[c_size_t, int],
placeholder: Union[c_bool, bool],
/,
) -> mtmd_helper_bitmap_wrapper: ...


def mtmd_helper_bitmap_init_from_buf(
ctx: mtmd_context_p,
buf: CtypesArray[c_uint8],
length: Union[c_size_t, int],
placeholder: Union[c_bool, bool],
/,
) -> Optional[mtmd_bitmap_p]: ...
) -> Optional[mtmd_bitmap_p]:
"""Initialize an MTMD bitmap from a buffer."""
return mtmd_helper_bitmap_init_from_buf_wrapper(
ctx, buf, length, placeholder
).bitmap


# MTMD_API size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks);
Expand Down Expand Up @@ -719,6 +836,94 @@ def mtmd_helper_decode_image_chunk(
...


# MTMD_API struct mtmd_helper_video_init_params mtmd_helper_video_init_params_default(void);
@ctypes_function(
"mtmd_helper_video_init_params_default", [], mtmd_helper_video_init_params
)
def mtmd_helper_video_init_params_default() -> mtmd_helper_video_init_params:
"""Return the default MTMD helper video initialization parameters."""
...


# MTMD_API mtmd_helper_video * mtmd_helper_video_init(
# struct mtmd_context * mctx,
# const char * path,
# struct mtmd_helper_video_init_params params);
@ctypes_function(
"mtmd_helper_video_init",
[mtmd_context_p_ctypes, c_char_p, mtmd_helper_video_init_params],
mtmd_helper_video_p_ctypes,
)
def mtmd_helper_video_init(
ctx: mtmd_context_p,
path: bytes,
params: mtmd_helper_video_init_params,
/,
) -> Optional[mtmd_helper_video_p]:
"""Initialize an MTMD helper video stream from a file path."""
...


# MTMD_API mtmd_helper_video * mtmd_helper_video_init_from_buf(
# struct mtmd_context * mctx,
# const unsigned char * buf, size_t len,
# struct mtmd_helper_video_init_params params);
@ctypes_function(
"mtmd_helper_video_init_from_buf",
[mtmd_context_p_ctypes, POINTER(c_uint8), c_size_t, mtmd_helper_video_init_params],
mtmd_helper_video_p_ctypes,
)
def mtmd_helper_video_init_from_buf(
ctx: mtmd_context_p,
buf: CtypesArray[c_uint8],
length: Union[c_size_t, int],
params: mtmd_helper_video_init_params,
/,
) -> Optional[mtmd_helper_video_p]:
"""Initialize an MTMD helper video stream from a buffer."""
...


# MTMD_API void mtmd_helper_video_free(mtmd_helper_video * ctx);
@ctypes_function("mtmd_helper_video_free", [mtmd_helper_video_p_ctypes], None)
def mtmd_helper_video_free(ctx: mtmd_helper_video_p, /):
"""Free an MTMD helper video stream."""
...


# MTMD_API struct mtmd_helper_video_info mtmd_helper_video_get_info(const mtmd_helper_video * ctx);
@ctypes_function(
"mtmd_helper_video_get_info",
[mtmd_helper_video_p_ctypes],
mtmd_helper_video_info,
)
def mtmd_helper_video_get_info(ctx: mtmd_helper_video_p, /) -> mtmd_helper_video_info:
"""Get metadata for an MTMD helper video stream."""
...


# MTMD_API int32_t mtmd_helper_video_read_next(mtmd_helper_video * ctx,
# mtmd_bitmap ** out_bitmap,
# char ** out_text);
@ctypes_function(
"mtmd_helper_video_read_next",
[
mtmd_helper_video_p_ctypes,
POINTER(mtmd_bitmap_p_ctypes),
POINTER(c_char_p),
],
c_int,
)
def mtmd_helper_video_read_next(
ctx: mtmd_helper_video_p,
out_bitmap: "_Pointer[mtmd_bitmap_p_ctypes]",
out_text: "_Pointer[c_char_p]",
/,
) -> int:
"""Read the next bitmap or text chunk from an MTMD helper video stream."""
...


# MTMD_API void mtmd_log_set(ggml_log_callback log_callback, void * user_data);
@ctypes_function(
"mtmd_log_set",
Expand Down
Loading