embed_llamar            Embedding provider for ragnar / standalone use
llama_backend_devices   List available backend devices
llama_batch_free        Free a llama batch allocated with
                        'llama_batch_init()'
llama_batch_init        Initialise a llama batch
llama_chat_apply_template
                        Apply chat template to messages
llama_chat_builtin_templates
                        List built-in chat templates
llama_chat_template     Get model's built-in chat template
llama_detokenize        Detokenize token IDs back to text
llama_embed_batch       Batch embeddings for multiple texts
llama_embeddings        Extract embeddings for a text
llama_encode            Encode tokens using the encoder
                        (encoder-decoder models only)
llama_free_context      Free an inference context
llama_free_model        Free a loaded model
llama_generate          Generate text from a prompt
llama_get_embeddings    Get all output token embeddings as a matrix
llama_get_embeddings_ith
                        Get embeddings for the i-th token in the batch
llama_get_embeddings_seq
                        Get pooled embeddings for a sequence
llama_get_logits        Get logits from the last decode step
llama_get_logits_ith    Get logits for a specific token position
llama_get_model         Get the model associated with a context
llama_get_verbosity     Get current verbosity level
llama_hf_cache_clear    Clear the model cache
llama_hf_cache_dir      Get the cache directory for downloaded models
llama_hf_cache_info     Show information about the model cache
llama_hf_download       Download a GGUF model from Hugging Face
llama_hf_list           List GGUF files in a Hugging Face repository
llama_load_model        Load a GGUF model file
llama_load_model_hf     Load a model directly from Hugging Face
llama_lora_apply        Apply a LoRA adapter to context
llama_lora_clear        Remove all LoRA adapters from context
llama_lora_load         Load a LoRA adapter
llama_lora_remove       Remove a LoRA adapter from context
llama_max_devices       Get maximum number of devices
llama_memory_breakdown_print
                        Print memory breakdown by device
llama_memory_can_shift
                        Check if the KV cache supports shifting
llama_memory_clear      Clear the KV cache
llama_memory_seq_add    Shift token positions in a sequence
llama_memory_seq_cp     Copy a sequence in the KV cache
llama_memory_seq_div    Integer-divide token positions in a sequence
llama_memory_seq_keep   Keep only one sequence in the KV cache
llama_memory_seq_pos_range
                        Get position range for a sequence
llama_memory_seq_rm     Remove tokens from a sequence in the KV cache
llama_model_info        Get model metadata
llama_model_meta        Get all model metadata as a named character
                        vector
llama_model_meta_val    Get a single model metadata value by key
llama_n_batch           Get logical batch size
llama_n_ctx             Get context window size
llama_n_ctx_seq         Get per-sequence context window size
llama_n_seq_max         Get maximum number of sequences
llama_n_threads         Get number of threads for single-token
                        generation
llama_n_threads_batch   Get number of threads for batch processing
llama_n_ubatch          Get physical micro-batch size
llama_new_context       Create an inference context
llama_numa_init         Initialize NUMA optimization
llama_perf              Get performance statistics
llama_perf_print        Print performance statistics to the console
llama_perf_reset        Reset performance counters
llama_pooling_type      Get pooling type
llama_set_abort_callback
                        Set or clear the abort callback
llama_set_causal_attn   Set causal attention mode
llama_set_threads       Set the number of threads for a context
llama_set_verbosity     Set logging verbosity level
llama_set_warmup        Set warmup mode
llama_state_get_size    Get the size of the serialized context state in
                        bytes
llama_state_load        Load context state from file
llama_state_save        Save context state to file
llama_supports_gpu      Check whether GPU offloading is available
llama_supports_mlock    Check whether memory locking is supported
llama_supports_mmap     Check whether memory-mapped file I/O is
                        supported
llama_supports_rpc      Check whether RPC backend is available
llama_synchronize       Synchronize asynchronous computation
llama_system_info       Get system information string
llama_time_us           Get current time in microseconds
llama_token_to_piece    Convert a single token ID to its text piece
llama_tokenize          Tokenize text into token IDs
llama_vocab_get_score   Get the score of a token
llama_vocab_get_text    Get the text representation of a token
llama_vocab_info        Get vocabulary special token IDs
llama_vocab_is_control
                        Check if a token is a control token
llama_vocab_is_eog      Check if a token is an end-of-generation token
llama_vocab_type        Get vocabulary type
