Utils

Components that are not part of the core functionality of the library but in general make its use better

Setup

c = genai.client.Client(api_key=os.environ.get("GEMINI_API_KEY"))
r = c.models.generate_content(model='gemini-2.0-flash', contents="Hi Gemini!")
print(r.text)
Hi there! How can I help you today?
mr = c.models.generate_images(
    model = 'imagen-3.0-generate-002',
    prompt = "Turtles in space",
    config = {"number_of_images": 2}
)
mmr = c.models.generate_content(model='gemini-2.0-flash-exp', 
                                contents="Generate an watercolor picture of a peaceful pond and write a Haiku about it",
                               config={"response_modalities": ["TEXT", "IMAGE"]})

Models and their functionalities

Exported source
all_model_types = {
    "gemini-2.0-flash": "llm-vertex#gemini-2.0-flash",
    "gemini-2.0-flash-lite": "llm#gemini-2.0-flash-lite",
    "gemini-2.5-pro-preview-03-25": "llm-thinking#gemini-2.5-pro",
    "gemini-2.5-pro-exp-03-25": "llm-thinking#gemini-2.5-pro",
    "gemini-2.5-flash-preview-04-17": "llm-thinking#gemini-2.5-flash",
    "gemini-2.0-flash-exp": "llm-imagen#gemini-2.0-flash",
    "gemini-2.0-flash-exp-image-generation": "llm-imagen#gemini-2.0-flash",
    "gemini-2.0-flash-001": "llm-vertex#gemini-2.0-flash",
    "gemini-2.0-pro-exp-02-05": "llm#gemini-2.0-pro",
    "gemini-1.5-flash": "llm-vertex#gemini-1.5-flash",
    "gemini-1.5-pro": "llm-vertex#gemini-1.5-pro",
    "gemini-1.5-pro-002": "llm-vertex#gemini-1.5-pro",
    "gemini-1.5-flash-8b": "llm#gemini-1.5-flash-8b",
    "gemini-2.0-flash-thinking-exp-01-21": "llm-thinking#gemini-2.0-flash-thinking",
    "imagen-3.0-generate-002": "imagen#imagen-3.0"
}

thinking_models = [m for m in all_model_types if "thinking" in all_model_types[m]]

imagen_models = [m for m in all_model_types if "imagen" in all_model_types[m]]

vertex_models = [m for m in all_model_types if "vertex" in all_model_types[m]]

models = [m for m in all_model_types if "llm" in all_model_types[m]]

models
['gemini-2.0-flash',
 'gemini-2.0-flash-lite',
 'gemini-2.5-pro-preview-03-25',
 'gemini-2.5-pro-exp-03-25',
 'gemini-2.5-flash-preview-04-17',
 'gemini-2.0-flash-exp',
 'gemini-2.0-flash-exp-image-generation',
 'gemini-2.0-flash-001',
 'gemini-2.0-pro-exp-02-05',
 'gemini-1.5-flash',
 'gemini-1.5-pro',
 'gemini-1.5-pro-002',
 'gemini-1.5-flash-8b',
 'gemini-2.0-flash-thinking-exp-01-21']

With the impressive speed they are shipping lately, it’s not easy to keep track of all the capabilities and model names and version that can be accessed with Gemini. The all_model_types is a curated list of possibilities that can be used. All the other models are still accessible, but they are mostly legacy models.

Token Usage

r.usage_metadata.model_fields_set
{'candidates_token_count', 'prompt_token_count', 'total_token_count'}

source

usage

 usage (inp=0, out=0, cached=0)

A quicker and simpler constructor for the Usage Metadata model

Type Default Details
inp int 0 Number of input tokens (excluding cached)
out int 0 Number of output tokens
cached int 0 Number of cached tokens
Exported source
def usage(inp=0,     # Number of input tokens (excluding cached)
          out=0,     # Number of output tokens
          cached=0): # Number of cached tokens
    """A quicker and simpler constructor for the Usage Metadata model"""
    return types.GenerateContentResponseUsageMetadata(cached_content_token_count=cached, 
                                                      candidates_token_count=out, 
                                                      prompt_token_count=inp + cached, 
                                                      total_token_count=inp + out + cached)

Unusually, prompt_token_count includes both cached and uncached prompt tokens.

usage(0, 32, 12)
GenerateContentResponseUsageMetadata(cached_content_token_count=12, candidates_token_count=32, prompt_token_count=12, total_token_count=44)

As usual, constructor for models are very verbose, so we build a simpler version.


GenerateContentResponseUsageMetadata.total

 GenerateContentResponseUsageMetadata.total ()
Exported source
@patch(as_prop=True)
def cached(self: types.GenerateContentResponseUsageMetadata): 
    return self.cached_content_token_count or 0

@patch(as_prop=True)
def inp(self: types.GenerateContentResponseUsageMetadata): 
    return (self.prompt_token_count - self.cached) or 0

@patch(as_prop=True)
def out(self: types.GenerateContentResponseUsageMetadata): 
    return self.candidates_token_count or 0

@patch(as_prop=True)
def total(self: types.GenerateContentResponseUsageMetadata): 
    return self.total_token_count or self.prompt_token_count + self.candidates_token_count

GenerateContentResponseUsageMetadata.out

 GenerateContentResponseUsageMetadata.out ()

GenerateContentResponseUsageMetadata.inp

 GenerateContentResponseUsageMetadata.inp ()

GenerateContentResponseUsageMetadata.cached

 GenerateContentResponseUsageMetadata.cached ()
u = usage(1, 2, 3)
u.inp, u.out, u.cached, u.total
(1, 2, 3, 6)

We patch a few properties to make dealing with the usage object a bit less verbose.


GenerateContentResponseUsageMetadata.__repr__

 GenerateContentResponseUsageMetadata.__repr__ ()
Exported source
@patch
def __repr__(self: types.GenerateContentResponseUsageMetadata):
    return f"Cached: {self.cached}; In: {self.inp}; Out: {self.out}; Total: {self.total}"

@patch
def _repr_markdown_(self: types.GenerateContentResponseUsageMetadata):
    return self.__repr__()
u

Cached: 3; In: 1; Out: 2; Total: 6

Finally, we make the string and markdown representation a bit more readable and coherent with the ones in claudette.


GenerateContentResponseUsageMetadata.__add__

 GenerateContentResponseUsageMetadata.__add__ (other)
Exported source
@patch
def __add__(self: types.GenerateContentResponseUsageMetadata, other):
    cached = getattr(self, "cached", 0) + getattr(other, "cached", 0)
    return usage(self.inp + other.inp, self.out + other.out, cached)
usage(5, 1) + usage(32, 32, 32)

Cached: 32; In: 37; Out: 33; Total: 102

Pricings


source

get_pricing

 get_pricing (model, prompt_tokens, thinking_mode=True)
Exported source
# $/1M input (non cached) tokens, $/1M output tokens, $/1M cached input tokens, 

pricings = {
    'gemini-2.5-pro_short': [1.25, 10., 0.3125],
    'gemini-2.5-pro_long': [2.5, 15., 0.625],
    'gemini-2.5-flash': [0.15, .6, 0.0375], # No cache for this yet
    'gemini-2.5-flash_thinking': [0.15, 3.5, 0.0375],
    'gemini-2.0-flash': [0.1, 0.4, 0.025],
    'gemini-2.0-flash-lite': [0.075, 0.3, 0.01875],
    'gemini-1.5-flash_short': [0.075, 0.3, 0.01875],
    'gemini-1.5-flash_long': [0.15, 0.6, 0.0375], 
    'gemini-1.5-flash-8b_short': [0.0375, 0.15, 0.01],
    'gemini-1.5-flash-8b_long': [0.075, 0.3, 0.02],
    'gemini-1.5-pro_short': [1.25, 5., 0.3125],   
    'gemini-1.5-pro_long': [2.5, 10., 0.625],
 }


audio_token_pricings = {
    'gemini-2.0-flash': [0.7, 0.4, 0.175],
}

def get_pricing(model, prompt_tokens, thinking_mode=True):
    if "exp" in model: return [0, 0, 0]
    limit = 200_000 if '2.5-pro' in model else 128_000
    suff = "_long" if prompt_tokens > limit else "_short"
    m = all_model_types.get(model, "#").split("#")[-1]
    m += suff if "1.5" in m or "2.5-pro" in m else ""
    m += "_thinking" if "2.5-flash" in m and thinking_mode else ""
    return pricings.get(m, [0, 0, 0])

The pricing of Gemini model queries is quite byzantine, with the price of a query on Gemini 1.5 dependent on the prompt length, while for Gemini 2.0 models it depends on the input type, and Gemini 2.5 depending on different prompt lengths (except for 2.5 flash, whose pricing depends on whether thinking is enabled or not)…

A few things to notice:

  • The differential pricings for audio tokens for Gemini 2.0 Flash is not implemented (and the cost for audio tokens caching will be active starting Feb 24, 2025)
  • Caching costs are not only per query. There is an added cost of storing the cache, which is computed separately and depends on the cached content storage time, as well as the number of tokens

TODO: for the moment we are ignoring these nuances in the cost calculations, but we might want to do more precise computations at a later date (and maybe include cost of multimedia generation with Gemini 2, when it becomes available).

for m in models:
    print(m, "SHORT PROMPT", get_pricing(m , 1_000))
    print(m, "LONG PROMPT", get_pricing(m , 1_000_000))
gemini-2.0-flash SHORT PROMPT [0.1, 0.4, 0.025]
gemini-2.0-flash LONG PROMPT [0.1, 0.4, 0.025]
gemini-2.0-flash-lite SHORT PROMPT [0.075, 0.3, 0.01875]
gemini-2.0-flash-lite LONG PROMPT [0.075, 0.3, 0.01875]
gemini-2.5-pro-exp-03-25 SHORT PROMPT [0, 0, 0]
gemini-2.5-pro-exp-03-25 LONG PROMPT [0, 0, 0]
gemini-2.0-flash-exp SHORT PROMPT [0, 0, 0]
gemini-2.0-flash-exp LONG PROMPT [0, 0, 0]
gemini-2.0-flash-exp-image-generation SHORT PROMPT [0, 0, 0]
gemini-2.0-flash-exp-image-generation LONG PROMPT [0, 0, 0]
gemini-2.0-flash-001 SHORT PROMPT [0.1, 0.4, 0.025]
gemini-2.0-flash-001 LONG PROMPT [0.1, 0.4, 0.025]
gemini-2.0-pro-exp-02-05 SHORT PROMPT [0, 0, 0]
gemini-2.0-pro-exp-02-05 LONG PROMPT [0, 0, 0]
gemini-1.5-flash SHORT PROMPT [0.075, 0.3, 0.01875]
gemini-1.5-flash LONG PROMPT [0.15, 0.6, 0.0375]
gemini-1.5-pro SHORT PROMPT [1.25, 5.0, 0.3125]
gemini-1.5-pro LONG PROMPT [2.5, 10.0, 0.625]
gemini-1.5-pro-002 SHORT PROMPT [1.25, 5.0, 0.3125]
gemini-1.5-pro-002 LONG PROMPT [2.5, 10.0, 0.625]
gemini-1.5-flash-8b SHORT PROMPT [0.0375, 0.15, 0.01]
gemini-1.5-flash-8b LONG PROMPT [0.075, 0.3, 0.02]
gemini-2.0-flash-thinking-exp-01-21 SHORT PROMPT [0, 0, 0]
gemini-2.0-flash-thinking-exp-01-21 LONG PROMPT [0, 0, 0]

GenerateContentResponse.cost

 GenerateContentResponse.cost ()
Exported source
@patch(as_prop=True)
def cost(self: types.GenerateContentResponse):
    thinking_mode = getattr(self, "_thinking", True)
    ip, op, cp = get_pricing(self.model_version, self.usage_metadata.prompt_token_count, thinking_mode)
    return ((self.usage_metadata.inp * ip) + (self.usage_metadata.out * op) + (self.usage_metadata.cached * cp)) / 1e6
r.cost
4.7e-06

GenerateImagesResponse.cost

 GenerateImagesResponse.cost ()
Exported source
@patch(as_prop=True)
def cost(self: types.GenerateImagesResponse): return 0.03 * len(self.generated_images)
mr.cost
0.06

There is some inconsistency in the pricing of Imagen models: according to this page it’s $0.03 per image generated, while according to the pricing page it’s $0.03 per million tokens (but there is no way of counting tokens on the image generated). Until clarified, we’ll stick with the former for simplicity.

Formatting output

r.text
'Hi there! How can I help you today?\n'
r.to_json_dict()
{'candidates': [{'content': {'parts': [{'text': 'Hi there! How can I help you today?\n'}],
    'role': 'model'},
   'avg_logprobs': -0.006894644688476215,
   'finish_reason': 'STOP'}],
 'model_version': 'gemini-2.0-flash',
 'usage_metadata': {'candidates_token_count': 11,
  'prompt_token_count': 3,
  'total_token_count': 14},
 'automatic_function_calling_history': []}
r.model_fields_set
{'automatic_function_calling_history',
 'candidates',
 'model_version',
 'usage_metadata'}
type(r.usage_metadata), type(r.candidates), type(r.model_version)
(google.genai.types.GenerateContentResponseUsageMetadata, list, str)

We want to recursively navigate the nested tree of submodels and attributes. Each model in genai.types has three types of attributes:

  1. a genai.types module (like usage_metadata)
  2. a list (like candidates)
  3. a primitive type (like model_version)

We could extract the attributes of a model from the class itself, but to avoid cluttering the output, we can use the model_fields_set property to avoid attributes that are not set in the model instance.


source

get_repr

 get_repr (m, lab='')

Recurisvely fetch the markdown representation of genai.types fields, wrapping lists into <details> blocks

Exported source
def get_repr(m, lab=""):
    """Recurisvely fetch the markdown representation of genai.types fields, wrapping lists into `<details>` blocks"""
    if hasattr(m, '_repr_markdown_'): return m._repr_markdown_()
    if is_listy(m): return "\n".join([f"<details open='true'><summary>{lab}[{i}]</summary>{get_repr(li)}</details>" for i, li in enumerate(m)])
    if isinstance(m, dict): return "<ul>" + "\n".join([f"<li><b>{i}</b>: {get_repr(li, i)}</li>" for i, li in m.items()]) + "</ul>"
    if isinstance(m, bytes): return m[:10] + b'...'
    return str(m)
from IPython.display import Markdown
Markdown(get_repr([["A", "B", "C"], "b", "c", {"x": 2, "y": {"as": "sa"}}], "ex"))
ex[0]
[0] A
[1] B
[2] C
ex[1] b
ex[2] c
ex[3]
  • x: 2
  • y:
    • as: sa

The basic recursive loop is in place: we can handle genai.types (via their _repr_markdown_ methods), strings and lists. The handling of bytes is to avoid polluting (or crashing) the representation with a huge list of charachters in case of a multimodal response.


source

det_repr

 det_repr (m)
Exported source
def det_repr(m): return "<ul>" + "".join(f"<li><code>{d}</code>: {get_repr(getattr(m, d), d)}</li>" for d in m.model_fields_set) + "</ul>"
Markdown(det_repr(r.usage_metadata))
  • total_token_count: 14
  • candidates_token_count: 11
  • prompt_token_count: 3
Exported source
@patch
def _repr_markdown_(self: genai._common.BaseModel):
    return det_repr(self)
r
  • usage_metadata: Cached: 0; In: 3; Out: 11; Total: 14
  • model_version: gemini-2.0-flash
  • automatic_function_calling_history:
  • candidates:
    candidates[0]
    • finish_reason: FinishReason.STOP
    • content:
      • parts:
        parts[0]
        • text: Hi there! How can I help you today?
      • role: model
    • avg_logprobs: -0.006894644688476215

By using fastcore’s patch on the _common.BaseModel we have made sure that all the models in genai.types have a nice consistent markdown representation. We can now refine the representation for some of the types.

Response representation

To simplify handling multimodal outputs, we need utility function to extract the actual content from the generation response


source

contents

 contents (r:google.genai.types.GenerateContentResponse)

Helper functions to extract the text content from a Gemini Respons

Exported source
def all_contents(r: genai.types.GenerateContentResponse | genai.types.GenerateImagesResponse):
    """Returns a dictionary with the contents of a Gemini model response"""
    cts = {'text': '', 'images': [], 'calls': []}
    for part in nested_idx(r, 'candidates', 0, 'content', 'parts') or []:
        if part.text is not None: cts['text'] += part.text
        if part.inline_data is not None:
            cts['images'].append(types.Image(image_bytes=part.inline_data.data, mime_type=part.inline_data.mime_type))
    for im in nested_idx(r, 'generated_images') or []:
        cts['images'].append(im.image)
    for fc in nested_idx(r, 'function_calls') or []:
        cts['calls'].append(fc.to_json_dict())
    return cts

def contents(r: genai.types.GenerateContentResponse):
    """Helper functions to extract the text content from a Gemini Respons"""
    return all_contents(r).get('text', '')

source

all_contents

 all_contents (r:google.genai.types.GenerateContentResponse|google.genai.t
               ypes.GenerateImagesResponse)

Returns a dictionary with the contents of a Gemini model response

def rcheck(cts):
    print("\n---")
    print(f"Text: {cts['text']}")
    print(f"N. Images: {len(cts['images'])}")
    print([type(i) for i in cts['images']])

for resp in [r, mr, mmr]: rcheck(contents(resp))

---
Text: Hi there! How can I help you today?

N. Images: 0
[]

---
Text: 
N. Images: 2
[<class 'google.genai.types.Image'>, <class 'google.genai.types.Image'>]

---
Text: 

Still water so calm,
Lily pads like floating dreams,
Peace the only sound.
N. Images: 1
[<class 'google.genai.types.Image'>]

source

response_md_repr

 response_md_repr (resp:google.genai.types.GenerateContentResponse|google.
                   genai.types.GenerateImagesResponse)
Exported source
def response_md_repr(resp: types.GenerateContentResponse | types.GenerateImagesResponse):
    c = ''
    cts = all_contents(resp)
    if cts['images'] or cts['text']:
        for img in cts['images']:
            b64 = base64.b64encode(img.image_bytes).decode("utf-8")
            c += f'<div style="width: 200px; height: auto;"><img src="data:{img.mime_type};base64,{b64}" /></div>'
        c += cts['text'].replace("\n", "\n\n")
    if cts['calls']:
        calls = (f"<code>{call['name']}({', '.join([f'{a}={v}' for a, v in call['args'].items()])})</code>" for call in cts['calls'])
        calls_repr = '\n'.join(f'<li>{c}</li>' for c in calls)
        c += f"<details><summary>**Function Calls**</summary><ul>{calls_repr}</ul></details>"
    dets = det_repr(resp)
    return f"""{c}\n<details>{dets}</details>"""
    

@patch
def _repr_markdown_(self: types.GenerateContentResponse):
    return response_md_repr(self)

@patch
def _repr_markdown_(self: types.GenerateImagesResponse):
    return response_md_repr(self)
r
Hi there! How can I help you today?
  • usage_metadata: Cached: 0; In: 3; Out: 11; Total: 14
  • model_version: gemini-2.0-flash
  • automatic_function_calling_history:
  • candidates:
    candidates[0]
    • finish_reason: FinishReason.STOP
    • content:
      • parts:
        parts[0]
        • text: Hi there! How can I help you today?
      • role: model
    • avg_logprobs: -0.006894644688476215
mr
  • generated_images:
    generated_images[0]
    • image:
      • image_bytes: b’89PNG1a…’
      • mime_type: image/png
    generated_images[1]
    • image:
      • image_bytes: b’89PNG1a…’
      • mime_type: image/png
mmr


Still water so calm,
Lily pads like floating dreams,
Peace the only sound.
  • usage_metadata: Cached: 0; In: 16; Out: 18; Total: 34
  • model_version: gemini-2.0-flash-exp
  • automatic_function_calling_history:
  • candidates:
    candidates[0]
    • finish_reason: FinishReason.STOP
    • content:
      • parts:
        parts[0]
        • inline_data:
          • data: b’89PNG1a…’
          • mime_type: image/png
        parts[1]
        • text:

          Still water so calm, Lily pads like floating dreams, Peace the only sound.
      • role: model
    • index: 0