@@ -47,14 +47,16 @@ class LLMS:
4747 Class that represents a preconfigured set of large language modelservices.
4848 """
4949
50- def __init__ (self , config : Dict , debug : bool = False , use_phoenix : Optional [Union [str | Tuple [str , str ]]] = None ):
50+ def __init__ (self , config : Dict = None , debug : bool = False , use_phoenix : Optional [Union [str | Tuple [str , str ]]] = None ):
5151 """
5252 Initialize the LLMS object with the given configuration.
5353
5454 Use phoenix is either None or the URI of the phoenix endpoing or a tuple with the URI and the
5555 project name (so far this only works for local phoenix instances). Default URI for a local installation
5656 is "http://0.0.0.0:6006/v1/traces"
5757 """
58+ if config is None :
59+ config = dict (llms = [])
5860 self .config = deepcopy (config )
5961 self .debug = debug
6062 if not use_phoenix and config .get ("use_phoenix" ):
@@ -128,6 +130,26 @@ def elapsed(self, llmalias: Union[str, List[str], None] = None):
128130 if isinstance (llmalias , str ):
129131 return self .llms [llmalias ]["_elapsed_time" ]
130132 return sum ([self .llms [alias ]["_elapsed_time" ] for alias in llmalias ])
133+
134+ def get_llm_info (self , llmalias : str , name : str ) -> any :
135+ """
136+ For convenience, any parameter with a name staring with an underscore can be used to configure
137+ our own properties of the LLM object. This method returns the value of the given parameter name of None
138+ if not defined, where the name should not include the leading underscore.
139+ """
140+ return self .llms [llmalias ].config .get ("_" + name , None )
141+
142+ def default_max_tokens (self , llmalias : str ) -> int :
143+ """
144+ Return the default maximum number of tokens that the LLM will produce. This is sometimes smaller thant the actual
145+ max_tokens, but not supported by LiteLLM, so we use whatever is configured in the config and fall back
146+ to the actual max_tokens if not defined.
147+ """
148+ ret = self .llms [llmalias ].config .get ("default_max_tokens" )
149+ if ret is None :
150+ ret = self .max_output_tokens (llmalias )
151+ return ret
152+
131153
132154 def cost (self , llmalias : Union [str , List [str ], None ] = None ):
133155 """
@@ -146,14 +168,21 @@ def cost_per_token(self, llmalias: str) -> Tuple[Optional[float], Optional[float
146168 Return the estimated cost per prompt and completion token for the given model.
147169 This may be wrong or cost may get calculated in a different way, e.g. depending on
148170 cache, response time etc.
171+ If the model is not in the configuration, this makes and attempt to just get the cost as
172+ defined by the LiteLLM backend.
149173 If no cost is known this returns 0.0, 0.0
150174 """
151- llm = self .llms [llmalias ]
152- cc = llm .get ("cost_per_prompt_token" )
153- cp = llm .get ("cost_per_completion_token" )
175+ llm = self .llms .get (llmalias )
176+ cc , cp = None , None
177+ if llm is not None :
178+ cc = llm .get ("cost_per_prompt_token" )
179+ cp = llm .get ("cost_per_completion_token" )
180+ llmname = llm ["llm" ]
181+ else :
182+ llmname = llmalias
154183 if cc is None or cp is None :
155184 try :
156- tmpcp , tmpcc = litellm .cost_per_token (self . llms [ llmalias ][ "llm" ] , prompt_tokens = 1 , completion_tokens = 1 )
185+ tmpcp , tmpcc = litellm .cost_per_token (llmname , prompt_tokens = 1 , completion_tokens = 1 )
157186 except :
158187 tmpcp , tmpcc = None , None
159188 if cc is None :
@@ -166,12 +195,17 @@ def max_output_tokens(self, llmalias: str) -> Optional[int]:
166195 """
167196 Return the maximum number of prompt tokens that can be sent to the model.
168197 """
169- llm = self .llms [llmalias ]
170- ret = llm .get ("max_output_tokens" )
198+ llm = self .llms .get (llmalias )
199+ ret = None
200+ if llm is not None :
201+ llmname = llm ["llm" ]
202+ ret = llm .get ("max_output_tokens" )
203+ else :
204+ llmname = llmalias
171205 if ret is None :
172206 try :
173207 # ret = litellm.get_max_tokens(self.llms[llmalias]["llm"])
174- info = get_model_info (self . llms [ llmalias ][ "llm" ] )
208+ info = get_model_info (llmname )
175209 ret = info .get ("max_output_tokens" )
176210 except :
177211 ret = None
@@ -181,11 +215,16 @@ def max_input_tokens(self, llmalias: str) -> Optional[int]:
181215 """
182216 Return the maximum number of tokens possible in the prompt or None if not known.
183217 """
184- llm = self .llms [llmalias ]
185- ret = llm .get ("max_input_tokens" )
218+ llm = self .llms .get (llmalias )
219+ ret = None
220+ if llm is not None :
221+ ret = llm .get ("max_input_tokens" )
222+ llmname = llm ["llm" ]
223+ else :
224+ llmname = llmalias
186225 if ret is None :
187226 try :
188- info = get_model_info (self . llms [ llmalias ][ "llm" ] )
227+ info = get_model_info (llmname )
189228 ret = info .get ("max_input_tokens" )
190229 except :
191230 ret = None
0 commit comments