Source code for tau_eval.metrics.perplexity

import evaluate


# Loading evaluation metric
perplexity = evaluate.load("perplexity", module_type="metric")


[docs] def compute_perplexity(output_texts: str | list[str], model_id: str = "gpt2") -> dict[str, list[float]]: """ Computes perplexity scores for a list of output texts. Args: output_texts: A string or list of output text strings. model_id: HuggingFace model to use Returns: A dictionary containing perplexity scores for each input text. """ if not isinstance(output_texts, list): output_texts = [output_texts] result = perplexity.compute(predictions=output_texts, model_id=model_id) return result