Skip to content

Commit aa5dbf6

Browse files
committed
WIP
1 parent 631ffab commit aa5dbf6

File tree

2 files changed

+51
-12
lines changed

2 files changed

+51
-12
lines changed

llms_wrapper/llms.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,16 @@ class LLMS:
4747
Class that represents a preconfigured set of large language modelservices.
4848
"""
4949

50-
def __init__(self, config: Dict, debug: bool = False, use_phoenix: Optional[Union[str | Tuple[str, str]]] = None):
50+
def __init__(self, config: Dict = None, debug: bool = False, use_phoenix: Optional[Union[str | Tuple[str, str]]] = None):
5151
"""
5252
Initialize the LLMS object with the given configuration.
5353
5454
Use phoenix is either None or the URI of the phoenix endpoing or a tuple with the URI and the
5555
project name (so far this only works for local phoenix instances). Default URI for a local installation
5656
is "http://0.0.0.0:6006/v1/traces"
5757
"""
58+
if config is None:
59+
config = dict(llms=[])
5860
self.config = deepcopy(config)
5961
self.debug = debug
6062
if not use_phoenix and config.get("use_phoenix"):
@@ -128,6 +130,26 @@ def elapsed(self, llmalias: Union[str, List[str], None] = None):
128130
if isinstance(llmalias, str):
129131
return self.llms[llmalias]["_elapsed_time"]
130132
return sum([self.llms[alias]["_elapsed_time"] for alias in llmalias])
133+
134+
def get_llm_info(self, llmalias: str, name: str) -> any:
135+
"""
136+
For convenience, any parameter with a name staring with an underscore can be used to configure
137+
our own properties of the LLM object. This method returns the value of the given parameter name of None
138+
if not defined, where the name should not include the leading underscore.
139+
"""
140+
return self.llms[llmalias].config.get("_"+name, None)
141+
142+
def default_max_tokens(self, llmalias: str) -> int:
143+
"""
144+
Return the default maximum number of tokens that the LLM will produce. This is sometimes smaller thant the actual
145+
max_tokens, but not supported by LiteLLM, so we use whatever is configured in the config and fall back
146+
to the actual max_tokens if not defined.
147+
"""
148+
ret = self.llms[llmalias].config.get("default_max_tokens")
149+
if ret is None:
150+
ret = self.max_output_tokens(llmalias)
151+
return ret
152+
131153

132154
def cost(self, llmalias: Union[str, List[str], None] = None):
133155
"""
@@ -146,14 +168,21 @@ def cost_per_token(self, llmalias: str) -> Tuple[Optional[float], Optional[float
146168
Return the estimated cost per prompt and completion token for the given model.
147169
This may be wrong or cost may get calculated in a different way, e.g. depending on
148170
cache, response time etc.
171+
If the model is not in the configuration, this makes and attempt to just get the cost as
172+
defined by the LiteLLM backend.
149173
If no cost is known this returns 0.0, 0.0
150174
"""
151-
llm = self.llms[llmalias]
152-
cc = llm.get("cost_per_prompt_token")
153-
cp = llm.get("cost_per_completion_token")
175+
llm = self.llms.get(llmalias)
176+
cc, cp = None, None
177+
if llm is not None:
178+
cc = llm.get("cost_per_prompt_token")
179+
cp = llm.get("cost_per_completion_token")
180+
llmname = llm["llm"]
181+
else:
182+
llmname = llmalias
154183
if cc is None or cp is None:
155184
try:
156-
tmpcp, tmpcc = litellm.cost_per_token(self.llms[llmalias]["llm"], prompt_tokens=1, completion_tokens=1)
185+
tmpcp, tmpcc = litellm.cost_per_token(llmname, prompt_tokens=1, completion_tokens=1)
157186
except:
158187
tmpcp, tmpcc = None, None
159188
if cc is None:
@@ -166,12 +195,17 @@ def max_output_tokens(self, llmalias: str) -> Optional[int]:
166195
"""
167196
Return the maximum number of prompt tokens that can be sent to the model.
168197
"""
169-
llm = self.llms[llmalias]
170-
ret = llm.get("max_output_tokens")
198+
llm = self.llms.get(llmalias)
199+
ret = None
200+
if llm is not None:
201+
llmname = llm["llm"]
202+
ret = llm.get("max_output_tokens")
203+
else:
204+
llmname = llmalias
171205
if ret is None:
172206
try:
173207
# ret = litellm.get_max_tokens(self.llms[llmalias]["llm"])
174-
info = get_model_info(self.llms[llmalias]["llm"])
208+
info = get_model_info(llmname)
175209
ret = info.get("max_output_tokens")
176210
except:
177211
ret = None
@@ -181,11 +215,16 @@ def max_input_tokens(self, llmalias: str) -> Optional[int]:
181215
"""
182216
Return the maximum number of tokens possible in the prompt or None if not known.
183217
"""
184-
llm = self.llms[llmalias]
185-
ret = llm.get("max_input_tokens")
218+
llm = self.llms.get(llmalias)
219+
ret = None
220+
if llm is not None:
221+
ret = llm.get("max_input_tokens")
222+
llmname = llm["llm"]
223+
else:
224+
llmname = llmalias
186225
if ret is None:
187226
try:
188-
info = get_model_info(self.llms[llmalias]["llm"])
227+
info = get_model_info(llmname)
189228
ret = info.get("max_input_tokens")
190229
except:
191230
ret = None

llms_wrapper/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
import importlib.metadata
2-
__version__ = "0.1.27"
2+
__version__ = "0.1.28"
33

0 commit comments

Comments
 (0)