Module llmflex.Models.Factory.llm_factory

Expand source code
from __future__ import annotations
from ..Cores.base_core import BaseCore, BaseLLM
from ...Prompts.prompt_template import PromptTemplate
from typing import Literal, Optional, Dict, List, Any, Type

def detect_model_type(model_id: str) -> str:
    """This function attempts to get the model format type with the model id.

    Args:
        model_id (str): Model ID form Huggingface.

    Returns:
        str: Model format type.
    """
    if model_id is None:
        return 'openai'
    model_id = model_id.lower()
    if 'gguf' in model_id:
        return 'gguf'
    elif 'awq' in model_id:
        return 'awq'
    elif 'gptq' in model_id:
        return 'gptq'
    elif 'exl2' in model_id:
        return 'exl2'
    else:
        return 'default'

class LlmFactory:

    def __init__(self, 
                model_id: str, 
                model_type: Literal['auto', 'default', 'gptq', 'awq', 'gguf', 'openai', 'exl2', 'debug'] = 'auto',
                model_file: Optional[str] = None,
                model_kwargs: Optional[Dict[str, Any]] = None,
                revision: Optional[str] = None,
                from_local: bool = False,
                context_length: int = 4096,
                base_url: Optional[str] = None,
                api_key: Optional[str] = None,
                tokenizer_id: Optional[str] = None,
                tokenizer_kwargs: Optional[Dict[str, Any]] = None,
                init_empty: bool = False,
                **kwargs) -> None:
        """Initialise the model core to create LLMs.

        Args:
            model_id (str): Model ID (from Huggingface) to use or the model to use if using OpenAI API core.
            model_type (Literal['auto', 'default', 'gptq', 'awq', 'gguf', 'openai', 'exl2', 'debug'], optional): Type of model format, if 'auto' is given, model_type will be automatically detected. Defaults to 'auto'.
            model_file (Optional[str], optional): Specific model file to use. Only useful for `model_type="gguf"`. Defaults to None.
            model_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments for loading the model. Only useful for Default, GPTQ, and AWQ models. Defaults to None.
            revision (Optional[str], optional): Specific revision of the model repository. Only useful for `model_type="exl2"`. Defaults to None.
            from_local (bool, optional): Whether to treat the model_id given as a local path or a Huggingface ID. Only useful for GGUF models. Defaults to False.
            context_length (int, optional): Size of the context window. Only useful for GGUF models. Defaults to 4096.
            base_url (Optional[str], optional): Base URL for the API. Only useful for OpenAI APIs. Defaults to None.
            api_key (Optional[str], optional): API key for OpenAI API. Defaults to None.
            tokenizer_id (Optional[str], optional): Model ID (from Huggingface) to load the tokenizer. Useful for model types "default", "gptq", "awq", and "openai". Defaults to None.
            tokenizer_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments for loading the tokenizer. Useful for model types "default", "gptq", "awq", and "openai".  Defaults to None.
            init_empty (bool, optional): Initialise without a model core. Should not be used for normal initialisation. Defaults to False.
        """
        if not init_empty:
            self._model_id = model_id
            self._model_type = detect_model_type(model_id=model_id) if model_type=='auto' else model_type
            if self.model_type == 'gguf':
                from ..Cores.llamacpp_core import LlamaCppCore
                self._core = LlamaCppCore(self.model_id, model_file=model_file, context_length=context_length, from_local=from_local, **kwargs)
            elif self.model_type in ['default', 'awq', 'gptq']:
                from ..Cores.huggingface_core import HuggingfaceCore
                self._core = HuggingfaceCore(self.model_id, model_type=self.model_type, model_kwargs=model_kwargs, tokenizer_kwargs=tokenizer_kwargs)
            elif self.model_type == 'openai':
                from ..Cores.openai_core import OpenAICore
                self._core = OpenAICore(base_url=base_url, api_key=api_key, model_id=model_id, tokenizer_id=tokenizer_id, tokenizer_kwargs=tokenizer_kwargs)
                self._model_id = self.core.model_id
            elif self.model_type == 'exl2':
                from ..Cores.exllamav2_core import Exl2Core
                self._core = Exl2Core(self.model_id, revision=revision, **kwargs)
            elif self.model_type == 'debug':
                self._core = BaseCore(model_id=self.model_id, **kwargs)
            else:
                raise ValueError(f'Model type "{self.model_type}" not supported.')
        
    @classmethod
    def from_model_object(cls, model: Any, tokenizer: Any, model_type: Literal['default', 'gptq', 'awq', 'gguf', 'openai', 'exl2'], model_id: str = 'Unknown', **kwargs) -> LlmFactory:
        """Initialise the factory object with an already loaded model.

        Args:
            model (Any): The pre-loaded model.
            tokenizer (Any): The pre-loaded tokenizer.
            model_type (Literal['default', 'gptq', 'awq', 'gguf', 'openai', 'exl2']): Type of model format.
            model_id (str, optional): Name to be given to the model, recommend to use the repo ID on HuggingFace. Defaults to 'Unknown'.

        Returns:
            LlmFactory: The initialised llm factory.
        """
        if model_type in ['default', 'gptq', 'awq']:
            from ..Cores.huggingface_core import HuggingfaceCore
            core = HuggingfaceCore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id, model_type=model_type)
        elif model_type == 'gguf':
            from ..Cores.llamacpp_core import LlamaCppCore
            core = LlamaCppCore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
        elif model_type == 'exl2':
            from ..Cores.exllamav2_core import Exl2Core
            core = Exl2Core.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
        elif model_type == 'openai':
            from ..Cores.openai_core import OpenAICore
            core = OpenAICore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
        factory = cls(model_id='', nit_empty=True)
        factory._core = core
        factory._model_id = core.model_id
        factory._model_type = model_type
        return factory

    @property
    def model_id(self) -> str:
        """Model id (from Huggingface).

        Returns:
            str: Model id (from Huggingface).
        """
        return self._model_id
    
    @property
    def model_type(self) -> str:
        """Type of model format.

        Returns:
            str: Type of model format.
        """
        return self._model_type 
    
    @property
    def core(self) -> Type[BaseCore]:
        """Core model of the llm factory.

        Returns:
            Type[BaseCore]: Core model of the llm factory.
        """
        return self._core
    
    @property
    def prompt_template(self) -> PromptTemplate:
        """Default prompt template for the model.

        Returns:
            PromptTemplate: Default prompt template for the model.
        """
        return self.core.prompt_template
    
    def __call__(self, temperature: float = 0.8, max_new_tokens: int = 256, top_p: float = 0.95,
                top_k: int = 40, repetition_penalty: float = 1.1, stop: Optional[List[str]] = None, 
                newline=True, **kwargs: Dict[str, Any]) -> BaseLLM:
        """Calling the object will create a langchain format llm with the generation configurations passed from the arguments. 

        Args:
            temperature (float, optional): Set how "creative" the model is, the samller it is, the more static of the output. Defaults to 0.8.
            max_new_tokens (int, optional): Maximum number of tokens to generate by the llm. Defaults to 256.
            top_p (float, optional): While sampling the next token, only consider the tokens above this p value. Defaults to 0.95.
            top_k (int, optional): While sampling the next token, only consider the top "top_k" tokens. Defaults to 40.
            repetition_penalty (float, optional): The value to penalise the model for generating repetitive text. Defaults to 1.1.
            stop (Optional[List[str]], optional): List of strings to stop the generation of the llm. Defaults to None.
            newline (bool, optional): Whether to add a newline character to the beginning of the "stop" list provided. Defaults to True.

        Returns:
            Type[BaseLLM]: An LLM.
        """
        return self.call(temperature=temperature, max_new_tokens=max_new_tokens,
                          top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,
                          stop=stop, newline=newline, **kwargs)
    
    def call(self, temperature: float = 0.8, max_new_tokens: int = 2048, top_p: float = 0.95,
                top_k: int = 40, repetition_penalty: float = 1.1, stop: Optional[List[str]] = None, 
                newline=True, **kwargs: Dict[str, Any]) -> Type[BaseLLM]:
        """Calling the object will create a langchain format llm with the generation configurations passed from the arguments. 

        Args:
            temperature (float, optional): Set how "creative" the model is, the samller it is, the more static of the output. Defaults to 0.8.
            max_new_tokens (int, optional): Maximum number of tokens to generate by the llm. Defaults to 2048.
            top_p (float, optional): While sampling the next token, only consider the tokens above this p value. Defaults to 0.95.
            top_k (int, optional): While sampling the next token, only consider the top "top_k" tokens. Defaults to 40.
            repetition_penalty (float, optional): The value to penalise the model for generating repetitive text. Defaults to 1.1.
            stop (Optional[List[str]], optional): List of strings to stop the generation of the llm. Defaults to None.
            newline (bool, optional): Whether to add a newline character to the beginning of the "stop" list provided. Defaults to True.

        Returns:
            BaseLLM: An LLM.
        """
        from ..Cores.base_core import GenericLLM
        return GenericLLM(core=self.core, temperature=temperature, max_new_tokens=max_new_tokens, 
                                 top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stop=stop, stop_newline_version=newline)

Functions

def detect_model_type(model_id: str) ‑> str

This function attempts to get the model format type with the model id.

Args

model_id : str
Model ID form Huggingface.

Returns

str
Model format type.
Expand source code
def detect_model_type(model_id: str) -> str:
    """This function attempts to get the model format type with the model id.

    Args:
        model_id (str): Model ID form Huggingface.

    Returns:
        str: Model format type.
    """
    if model_id is None:
        return 'openai'
    model_id = model_id.lower()
    if 'gguf' in model_id:
        return 'gguf'
    elif 'awq' in model_id:
        return 'awq'
    elif 'gptq' in model_id:
        return 'gptq'
    elif 'exl2' in model_id:
        return 'exl2'
    else:
        return 'default'

Classes

class LlmFactory (model_id: str, model_type: "Literal['auto', 'default', 'gptq', 'awq', 'gguf', 'openai', 'exl2', 'debug']" = 'auto', model_file: Optional[str] = None, model_kwargs: Optional[Dict[str, Any]] = None, revision: Optional[str] = None, from_local: bool = False, context_length: int = 4096, base_url: Optional[str] = None, api_key: Optional[str] = None, tokenizer_id: Optional[str] = None, tokenizer_kwargs: Optional[Dict[str, Any]] = None, init_empty: bool = False, **kwargs)

Initialise the model core to create LLMs.

Args

model_id : str
Model ID (from Huggingface) to use or the model to use if using OpenAI API core.
model_type (Literal['auto', 'default', 'gptq', 'awq', 'gguf', 'openai', 'exl2', 'debug'], optional): Type of model format, if 'auto' is given, model_type will be automatically detected. Defaults to 'auto'.
model_file : Optional[str], optional
Specific model file to use. Only useful for model_type="gguf". Defaults to None.
model_kwargs : Optional[Dict[str, Any]], optional
Keyword arguments for loading the model. Only useful for Default, GPTQ, and AWQ models. Defaults to None.
revision : Optional[str], optional
Specific revision of the model repository. Only useful for model_type="exl2". Defaults to None.
from_local : bool, optional
Whether to treat the model_id given as a local path or a Huggingface ID. Only useful for GGUF models. Defaults to False.
context_length : int, optional
Size of the context window. Only useful for GGUF models. Defaults to 4096.
base_url : Optional[str], optional
Base URL for the API. Only useful for OpenAI APIs. Defaults to None.
api_key : Optional[str], optional
API key for OpenAI API. Defaults to None.
tokenizer_id : Optional[str], optional
Model ID (from Huggingface) to load the tokenizer. Useful for model types "default", "gptq", "awq", and "openai". Defaults to None.
tokenizer_kwargs : Optional[Dict[str, Any]], optional
Keyword arguments for loading the tokenizer. Useful for model types "default", "gptq", "awq", and "openai". Defaults to None.
init_empty : bool, optional
Initialise without a model core. Should not be used for normal initialisation. Defaults to False.
Expand source code
class LlmFactory:

    def __init__(self, 
                model_id: str, 
                model_type: Literal['auto', 'default', 'gptq', 'awq', 'gguf', 'openai', 'exl2', 'debug'] = 'auto',
                model_file: Optional[str] = None,
                model_kwargs: Optional[Dict[str, Any]] = None,
                revision: Optional[str] = None,
                from_local: bool = False,
                context_length: int = 4096,
                base_url: Optional[str] = None,
                api_key: Optional[str] = None,
                tokenizer_id: Optional[str] = None,
                tokenizer_kwargs: Optional[Dict[str, Any]] = None,
                init_empty: bool = False,
                **kwargs) -> None:
        """Initialise the model core to create LLMs.

        Args:
            model_id (str): Model ID (from Huggingface) to use or the model to use if using OpenAI API core.
            model_type (Literal['auto', 'default', 'gptq', 'awq', 'gguf', 'openai', 'exl2', 'debug'], optional): Type of model format, if 'auto' is given, model_type will be automatically detected. Defaults to 'auto'.
            model_file (Optional[str], optional): Specific model file to use. Only useful for `model_type="gguf"`. Defaults to None.
            model_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments for loading the model. Only useful for Default, GPTQ, and AWQ models. Defaults to None.
            revision (Optional[str], optional): Specific revision of the model repository. Only useful for `model_type="exl2"`. Defaults to None.
            from_local (bool, optional): Whether to treat the model_id given as a local path or a Huggingface ID. Only useful for GGUF models. Defaults to False.
            context_length (int, optional): Size of the context window. Only useful for GGUF models. Defaults to 4096.
            base_url (Optional[str], optional): Base URL for the API. Only useful for OpenAI APIs. Defaults to None.
            api_key (Optional[str], optional): API key for OpenAI API. Defaults to None.
            tokenizer_id (Optional[str], optional): Model ID (from Huggingface) to load the tokenizer. Useful for model types "default", "gptq", "awq", and "openai". Defaults to None.
            tokenizer_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments for loading the tokenizer. Useful for model types "default", "gptq", "awq", and "openai".  Defaults to None.
            init_empty (bool, optional): Initialise without a model core. Should not be used for normal initialisation. Defaults to False.
        """
        if not init_empty:
            self._model_id = model_id
            self._model_type = detect_model_type(model_id=model_id) if model_type=='auto' else model_type
            if self.model_type == 'gguf':
                from ..Cores.llamacpp_core import LlamaCppCore
                self._core = LlamaCppCore(self.model_id, model_file=model_file, context_length=context_length, from_local=from_local, **kwargs)
            elif self.model_type in ['default', 'awq', 'gptq']:
                from ..Cores.huggingface_core import HuggingfaceCore
                self._core = HuggingfaceCore(self.model_id, model_type=self.model_type, model_kwargs=model_kwargs, tokenizer_kwargs=tokenizer_kwargs)
            elif self.model_type == 'openai':
                from ..Cores.openai_core import OpenAICore
                self._core = OpenAICore(base_url=base_url, api_key=api_key, model_id=model_id, tokenizer_id=tokenizer_id, tokenizer_kwargs=tokenizer_kwargs)
                self._model_id = self.core.model_id
            elif self.model_type == 'exl2':
                from ..Cores.exllamav2_core import Exl2Core
                self._core = Exl2Core(self.model_id, revision=revision, **kwargs)
            elif self.model_type == 'debug':
                self._core = BaseCore(model_id=self.model_id, **kwargs)
            else:
                raise ValueError(f'Model type "{self.model_type}" not supported.')
        
    @classmethod
    def from_model_object(cls, model: Any, tokenizer: Any, model_type: Literal['default', 'gptq', 'awq', 'gguf', 'openai', 'exl2'], model_id: str = 'Unknown', **kwargs) -> LlmFactory:
        """Initialise the factory object with an already loaded model.

        Args:
            model (Any): The pre-loaded model.
            tokenizer (Any): The pre-loaded tokenizer.
            model_type (Literal['default', 'gptq', 'awq', 'gguf', 'openai', 'exl2']): Type of model format.
            model_id (str, optional): Name to be given to the model, recommend to use the repo ID on HuggingFace. Defaults to 'Unknown'.

        Returns:
            LlmFactory: The initialised llm factory.
        """
        if model_type in ['default', 'gptq', 'awq']:
            from ..Cores.huggingface_core import HuggingfaceCore
            core = HuggingfaceCore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id, model_type=model_type)
        elif model_type == 'gguf':
            from ..Cores.llamacpp_core import LlamaCppCore
            core = LlamaCppCore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
        elif model_type == 'exl2':
            from ..Cores.exllamav2_core import Exl2Core
            core = Exl2Core.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
        elif model_type == 'openai':
            from ..Cores.openai_core import OpenAICore
            core = OpenAICore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
        factory = cls(model_id='', nit_empty=True)
        factory._core = core
        factory._model_id = core.model_id
        factory._model_type = model_type
        return factory

    @property
    def model_id(self) -> str:
        """Model id (from Huggingface).

        Returns:
            str: Model id (from Huggingface).
        """
        return self._model_id
    
    @property
    def model_type(self) -> str:
        """Type of model format.

        Returns:
            str: Type of model format.
        """
        return self._model_type 
    
    @property
    def core(self) -> Type[BaseCore]:
        """Core model of the llm factory.

        Returns:
            Type[BaseCore]: Core model of the llm factory.
        """
        return self._core
    
    @property
    def prompt_template(self) -> PromptTemplate:
        """Default prompt template for the model.

        Returns:
            PromptTemplate: Default prompt template for the model.
        """
        return self.core.prompt_template
    
    def __call__(self, temperature: float = 0.8, max_new_tokens: int = 256, top_p: float = 0.95,
                top_k: int = 40, repetition_penalty: float = 1.1, stop: Optional[List[str]] = None, 
                newline=True, **kwargs: Dict[str, Any]) -> BaseLLM:
        """Calling the object will create a langchain format llm with the generation configurations passed from the arguments. 

        Args:
            temperature (float, optional): Set how "creative" the model is, the samller it is, the more static of the output. Defaults to 0.8.
            max_new_tokens (int, optional): Maximum number of tokens to generate by the llm. Defaults to 256.
            top_p (float, optional): While sampling the next token, only consider the tokens above this p value. Defaults to 0.95.
            top_k (int, optional): While sampling the next token, only consider the top "top_k" tokens. Defaults to 40.
            repetition_penalty (float, optional): The value to penalise the model for generating repetitive text. Defaults to 1.1.
            stop (Optional[List[str]], optional): List of strings to stop the generation of the llm. Defaults to None.
            newline (bool, optional): Whether to add a newline character to the beginning of the "stop" list provided. Defaults to True.

        Returns:
            Type[BaseLLM]: An LLM.
        """
        return self.call(temperature=temperature, max_new_tokens=max_new_tokens,
                          top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,
                          stop=stop, newline=newline, **kwargs)
    
    def call(self, temperature: float = 0.8, max_new_tokens: int = 2048, top_p: float = 0.95,
                top_k: int = 40, repetition_penalty: float = 1.1, stop: Optional[List[str]] = None, 
                newline=True, **kwargs: Dict[str, Any]) -> Type[BaseLLM]:
        """Calling the object will create a langchain format llm with the generation configurations passed from the arguments. 

        Args:
            temperature (float, optional): Set how "creative" the model is, the samller it is, the more static of the output. Defaults to 0.8.
            max_new_tokens (int, optional): Maximum number of tokens to generate by the llm. Defaults to 2048.
            top_p (float, optional): While sampling the next token, only consider the tokens above this p value. Defaults to 0.95.
            top_k (int, optional): While sampling the next token, only consider the top "top_k" tokens. Defaults to 40.
            repetition_penalty (float, optional): The value to penalise the model for generating repetitive text. Defaults to 1.1.
            stop (Optional[List[str]], optional): List of strings to stop the generation of the llm. Defaults to None.
            newline (bool, optional): Whether to add a newline character to the beginning of the "stop" list provided. Defaults to True.

        Returns:
            BaseLLM: An LLM.
        """
        from ..Cores.base_core import GenericLLM
        return GenericLLM(core=self.core, temperature=temperature, max_new_tokens=max_new_tokens, 
                                 top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stop=stop, stop_newline_version=newline)

Static methods

def from_model_object(model: Any, tokenizer: Any, model_type: "Literal['default', 'gptq', 'awq', 'gguf', 'openai', 'exl2']", model_id: str = 'Unknown', **kwargs) ‑> LlmFactory

Initialise the factory object with an already loaded model.

Args

model : Any
The pre-loaded model.
tokenizer : Any
The pre-loaded tokenizer.
model_type (Literal['default', 'gptq', 'awq', 'gguf', 'openai', 'exl2']): Type of model format.
model_id : str, optional
Name to be given to the model, recommend to use the repo ID on HuggingFace. Defaults to 'Unknown'.

Returns

LlmFactory
The initialised llm factory.
Expand source code
@classmethod
def from_model_object(cls, model: Any, tokenizer: Any, model_type: Literal['default', 'gptq', 'awq', 'gguf', 'openai', 'exl2'], model_id: str = 'Unknown', **kwargs) -> LlmFactory:
    """Initialise the factory object with an already loaded model.

    Args:
        model (Any): The pre-loaded model.
        tokenizer (Any): The pre-loaded tokenizer.
        model_type (Literal['default', 'gptq', 'awq', 'gguf', 'openai', 'exl2']): Type of model format.
        model_id (str, optional): Name to be given to the model, recommend to use the repo ID on HuggingFace. Defaults to 'Unknown'.

    Returns:
        LlmFactory: The initialised llm factory.
    """
    if model_type in ['default', 'gptq', 'awq']:
        from ..Cores.huggingface_core import HuggingfaceCore
        core = HuggingfaceCore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id, model_type=model_type)
    elif model_type == 'gguf':
        from ..Cores.llamacpp_core import LlamaCppCore
        core = LlamaCppCore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
    elif model_type == 'exl2':
        from ..Cores.exllamav2_core import Exl2Core
        core = Exl2Core.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
    elif model_type == 'openai':
        from ..Cores.openai_core import OpenAICore
        core = OpenAICore.from_model_object(model=model, tokenizer=tokenizer, model_id=model_id)
    factory = cls(model_id='', nit_empty=True)
    factory._core = core
    factory._model_id = core.model_id
    factory._model_type = model_type
    return factory

Instance variables

var core : Type[BaseCore]

Core model of the llm factory.

Returns

Type[BaseCore]
Core model of the llm factory.
Expand source code
@property
def core(self) -> Type[BaseCore]:
    """Core model of the llm factory.

    Returns:
        Type[BaseCore]: Core model of the llm factory.
    """
    return self._core
var model_id : str

Model id (from Huggingface).

Returns

str
Model id (from Huggingface).
Expand source code
@property
def model_id(self) -> str:
    """Model id (from Huggingface).

    Returns:
        str: Model id (from Huggingface).
    """
    return self._model_id
var model_type : str

Type of model format.

Returns

str
Type of model format.
Expand source code
@property
def model_type(self) -> str:
    """Type of model format.

    Returns:
        str: Type of model format.
    """
    return self._model_type 
var prompt_templatePromptTemplate

Default prompt template for the model.

Returns

PromptTemplate
Default prompt template for the model.
Expand source code
@property
def prompt_template(self) -> PromptTemplate:
    """Default prompt template for the model.

    Returns:
        PromptTemplate: Default prompt template for the model.
    """
    return self.core.prompt_template

Methods

def call(self, temperature: float = 0.8, max_new_tokens: int = 2048, top_p: float = 0.95, top_k: int = 40, repetition_penalty: float = 1.1, stop: Optional[List[str]] = None, newline=True, **kwargs: Dict[str, Any]) ‑> Type[BaseLLM]

Calling the object will create a langchain format llm with the generation configurations passed from the arguments.

Args

temperature : float, optional
Set how "creative" the model is, the samller it is, the more static of the output. Defaults to 0.8.
max_new_tokens : int, optional
Maximum number of tokens to generate by the llm. Defaults to 2048.
top_p : float, optional
While sampling the next token, only consider the tokens above this p value. Defaults to 0.95.
top_k : int, optional
While sampling the next token, only consider the top "top_k" tokens. Defaults to 40.
repetition_penalty : float, optional
The value to penalise the model for generating repetitive text. Defaults to 1.1.
stop : Optional[List[str]], optional
List of strings to stop the generation of the llm. Defaults to None.
newline : bool, optional
Whether to add a newline character to the beginning of the "stop" list provided. Defaults to True.

Returns

BaseLLM
An LLM.
Expand source code
def call(self, temperature: float = 0.8, max_new_tokens: int = 2048, top_p: float = 0.95,
            top_k: int = 40, repetition_penalty: float = 1.1, stop: Optional[List[str]] = None, 
            newline=True, **kwargs: Dict[str, Any]) -> Type[BaseLLM]:
    """Calling the object will create a langchain format llm with the generation configurations passed from the arguments. 

    Args:
        temperature (float, optional): Set how "creative" the model is, the samller it is, the more static of the output. Defaults to 0.8.
        max_new_tokens (int, optional): Maximum number of tokens to generate by the llm. Defaults to 2048.
        top_p (float, optional): While sampling the next token, only consider the tokens above this p value. Defaults to 0.95.
        top_k (int, optional): While sampling the next token, only consider the top "top_k" tokens. Defaults to 40.
        repetition_penalty (float, optional): The value to penalise the model for generating repetitive text. Defaults to 1.1.
        stop (Optional[List[str]], optional): List of strings to stop the generation of the llm. Defaults to None.
        newline (bool, optional): Whether to add a newline character to the beginning of the "stop" list provided. Defaults to True.

    Returns:
        BaseLLM: An LLM.
    """
    from ..Cores.base_core import GenericLLM
    return GenericLLM(core=self.core, temperature=temperature, max_new_tokens=max_new_tokens, 
                             top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, stop=stop, stop_newline_version=newline)