Source code for tau_eval.metrics.rouge

import evaluate


# Loading evaluation metric
rouge = evaluate.load("rouge")


[docs] def compute_rouge( input_texts: str | list[str], output_texts: str | list[str], ) -> dict[str, list[float]]: """ Computes ROUGE scores for a list of input and output text pairs. Args: input_texts: A list of input text strings. output_texts: A list of output text strings. Returns: A dictionary containing ROUGE scores for each input-output pair. """ if not isinstance(input_texts, list): input_texts = [input_texts] # type: ignore if not isinstance(output_texts, list): output_texts = [output_texts] # type: ignore assert len(input_texts) == len(output_texts), "inputs are different lengths" return rouge.compute(predictions=output_texts, references=input_texts, use_aggregator=False)