Quentin Mace commited on
Commit ·
856deb1
1
Parent(s): 2b95769
final_v0
Browse files- app.py +17 -10
- app/utils.py +45 -18
- data/pipeline_handler.py +34 -8
app.py
CHANGED
|
@@ -52,7 +52,7 @@ def main():
|
|
| 52 |
data_pipeline = pipeline_handler.render_df(initial_metric_v3, "english")
|
| 53 |
data_pipeline = add_rank_and_format(data_pipeline, benchmark_version=3, is_pipeline=True)
|
| 54 |
|
| 55 |
-
num_datasets_pipeline = len(data_pipeline.columns) -
|
| 56 |
num_scores_pipeline = len(data_pipeline) * num_datasets_pipeline
|
| 57 |
num_pipelines = len(data_pipeline)
|
| 58 |
|
|
@@ -115,6 +115,13 @@ def main():
|
|
| 115 |
min-width: 100px !important; /* Just wide enough to fit long words like "Pharmaceuticals" */
|
| 116 |
max-width: 120px !important;
|
| 117 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
"""
|
| 119 |
|
| 120 |
with gr.Blocks(css=css) as block:
|
|
@@ -187,7 +194,7 @@ def main():
|
|
| 187 |
|
| 188 |
if len(data_pipeline) > 0:
|
| 189 |
datasets_columns_pipeline = [
|
| 190 |
-
col for col in data_pipeline.columns[4:] if col not in ["Indexing latency (s/doc)", "Search latency (s/query)", "Average"]
|
| 191 |
]
|
| 192 |
|
| 193 |
with gr.Row():
|
|
@@ -212,7 +219,7 @@ def main():
|
|
| 212 |
)
|
| 213 |
|
| 214 |
with gr.Row():
|
| 215 |
-
# Datatype: Rank,
|
| 216 |
datatype_pipeline = ["number", "markdown", "number", "number", "number"] + ["number"] * len(datasets_columns_pipeline)
|
| 217 |
dataframe_pipeline = gr.Dataframe(data_pipeline, datatype=datatype_pipeline, type="pandas", elem_id="pipeline-table")
|
| 218 |
|
|
@@ -223,9 +230,9 @@ def main():
|
|
| 223 |
data = filter_models(data, search_term)
|
| 224 |
if selected_columns:
|
| 225 |
# Include core columns plus selected dataset columns
|
| 226 |
-
core_cols = ["Rank", "
|
| 227 |
-
if "Average" in data.columns:
|
| 228 |
-
core_cols.insert(4, "Average")
|
| 229 |
data = data[core_cols + selected_columns]
|
| 230 |
return data
|
| 231 |
|
|
@@ -338,7 +345,7 @@ def main():
|
|
| 338 |
data = filter_models(data, search_term)
|
| 339 |
# data = remove_duplicates(data) # Add this line
|
| 340 |
if selected_columns:
|
| 341 |
-
data = data[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + selected_columns]
|
| 342 |
return data
|
| 343 |
|
| 344 |
with gr.Row():
|
|
@@ -440,7 +447,7 @@ def main():
|
|
| 440 |
data = add_rank_and_format(data, benchmark_version=1, selected_columns=selected_columns)
|
| 441 |
data = filter_models(data, search_term)
|
| 442 |
if selected_columns:
|
| 443 |
-
data = data[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + selected_columns]
|
| 444 |
return data
|
| 445 |
|
| 446 |
with gr.Row():
|
|
@@ -562,7 +569,7 @@ def main():
|
|
| 562 |
data = filter_models(data, search_term)
|
| 563 |
# data = remove_duplicates(data) # Add this line
|
| 564 |
if selected_columns:
|
| 565 |
-
data = data[["Rank", "Model", "Average"] + selected_columns]
|
| 566 |
return data
|
| 567 |
|
| 568 |
with gr.Row():
|
|
@@ -665,7 +672,7 @@ def main():
|
|
| 665 |
data = filter_models(data, search_term)
|
| 666 |
# data = remove_duplicates(data) # Add this line
|
| 667 |
if selected_columns:
|
| 668 |
-
data = data[["Rank", "Model", "Average"] + selected_columns]
|
| 669 |
return data
|
| 670 |
|
| 671 |
with gr.Row():
|
|
|
|
| 52 |
data_pipeline = pipeline_handler.render_df(initial_metric_v3, "english")
|
| 53 |
data_pipeline = add_rank_and_format(data_pipeline, benchmark_version=3, is_pipeline=True)
|
| 54 |
|
| 55 |
+
num_datasets_pipeline = len(data_pipeline.columns) - 5 # Excluding Rank, Model, Indexing time, search time, Average
|
| 56 |
num_scores_pipeline = len(data_pipeline) * num_datasets_pipeline
|
| 57 |
num_pipelines = len(data_pipeline)
|
| 58 |
|
|
|
|
| 115 |
min-width: 100px !important; /* Just wide enough to fit long words like "Pharmaceuticals" */
|
| 116 |
max-width: 120px !important;
|
| 117 |
}
|
| 118 |
+
|
| 119 |
+
/* 3. Make the Model column (2nd column) wider for pipeline table */
|
| 120 |
+
#pipeline-table table th:nth-child(2),
|
| 121 |
+
#pipeline-table table td:nth-child(2) {
|
| 122 |
+
min-width: 400px !important;
|
| 123 |
+
max-width: 500px !important;
|
| 124 |
+
}
|
| 125 |
"""
|
| 126 |
|
| 127 |
with gr.Blocks(css=css) as block:
|
|
|
|
| 194 |
|
| 195 |
if len(data_pipeline) > 0:
|
| 196 |
datasets_columns_pipeline = [
|
| 197 |
+
col for col in data_pipeline.columns[4:] if col not in ["Indexing latency (s/doc)", "Search latency (s/query)", "Average Score"]
|
| 198 |
]
|
| 199 |
|
| 200 |
with gr.Row():
|
|
|
|
| 219 |
)
|
| 220 |
|
| 221 |
with gr.Row():
|
| 222 |
+
# Datatype: Rank, Pipeline, Indexing latency (s/doc), Search latency (s/query), Average + datasets
|
| 223 |
datatype_pipeline = ["number", "markdown", "number", "number", "number"] + ["number"] * len(datasets_columns_pipeline)
|
| 224 |
dataframe_pipeline = gr.Dataframe(data_pipeline, datatype=datatype_pipeline, type="pandas", elem_id="pipeline-table")
|
| 225 |
|
|
|
|
| 230 |
data = filter_models(data, search_term)
|
| 231 |
if selected_columns:
|
| 232 |
# Include core columns plus selected dataset columns
|
| 233 |
+
core_cols = ["Rank", "Pipeline", "Indexing latency (s/doc)", "Search latency (s/query)"]
|
| 234 |
+
if "Average Score" in data.columns:
|
| 235 |
+
core_cols.insert(4, "Average Score")
|
| 236 |
data = data[core_cols + selected_columns]
|
| 237 |
return data
|
| 238 |
|
|
|
|
| 345 |
data = filter_models(data, search_term)
|
| 346 |
# data = remove_duplicates(data) # Add this line
|
| 347 |
if selected_columns:
|
| 348 |
+
data = data[["Rank", "Model", "Model Size (Million Parameters)", "Average Score"] + selected_columns]
|
| 349 |
return data
|
| 350 |
|
| 351 |
with gr.Row():
|
|
|
|
| 447 |
data = add_rank_and_format(data, benchmark_version=1, selected_columns=selected_columns)
|
| 448 |
data = filter_models(data, search_term)
|
| 449 |
if selected_columns:
|
| 450 |
+
data = data[["Rank", "Model", "Model Size (Million Parameters)", "Average Score"] + selected_columns]
|
| 451 |
return data
|
| 452 |
|
| 453 |
with gr.Row():
|
|
|
|
| 569 |
data = filter_models(data, search_term)
|
| 570 |
# data = remove_duplicates(data) # Add this line
|
| 571 |
if selected_columns:
|
| 572 |
+
data = data[["Rank", "Model", "Average Score"] + selected_columns]
|
| 573 |
return data
|
| 574 |
|
| 575 |
with gr.Row():
|
|
|
|
| 672 |
data = filter_models(data, search_term)
|
| 673 |
# data = remove_duplicates(data) # Add this line
|
| 674 |
if selected_columns:
|
| 675 |
+
data = data[["Rank", "Model", "Average Score"] + selected_columns]
|
| 676 |
return data
|
| 677 |
|
| 678 |
with gr.Row():
|
app/utils.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
-
|
| 3 |
-
def make_clickable_model(model_name, link=None, is_pipeline=False):
|
| 4 |
if is_pipeline:
|
| 5 |
-
# For pipelines:
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
if link is None:
|
| 9 |
-
link = f"https://github.com/illuin-tech/vidore-benchmark/blob/vidore_v3_pipeline/results/pipeline_descriptions/{
|
|
|
|
|
|
|
| 10 |
else:
|
| 11 |
# For regular models: replace __ and _ with /, and -thisisapoint- with .
|
| 12 |
desanitized_model_name = model_name.replace("__", "/")
|
|
@@ -25,7 +30,15 @@ def make_clickable_model(model_name, link=None, is_pipeline=False):
|
|
| 25 |
|
| 26 |
|
| 27 |
def add_rank(df, benchmark_version=1, selected_columns=None):
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
if selected_columns is None:
|
| 30 |
cols_to_rank = [
|
| 31 |
col
|
|
@@ -33,12 +46,14 @@ def add_rank(df, benchmark_version=1, selected_columns=None):
|
|
| 33 |
if col
|
| 34 |
not in [
|
| 35 |
"Model",
|
|
|
|
| 36 |
"Model Size (Million Parameters)",
|
| 37 |
"Memory Usage (GB, fp32)",
|
| 38 |
"Embedding Dimensions",
|
| 39 |
"Max Tokens",
|
| 40 |
"Compute Cost ($)",
|
| 41 |
"Queries per Second",
|
|
|
|
| 42 |
]
|
| 43 |
]
|
| 44 |
else:
|
|
@@ -48,24 +63,34 @@ def add_rank(df, benchmark_version=1, selected_columns=None):
|
|
| 48 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
| 49 |
else:
|
| 50 |
# Only add Average column if it doesn't already exist
|
| 51 |
-
if "Average" not in df.columns:
|
| 52 |
-
df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
|
| 53 |
-
df.sort_values("Average", ascending=False, inplace=True)
|
| 54 |
df.insert(0, "Rank", list(range(1, len(df) + 1)))
|
| 55 |
-
# multiply values by 100 if they are floats and round to
|
| 56 |
for col in df.columns:
|
| 57 |
-
if df[col]
|
| 58 |
-
df[col] = df[col].apply(lambda x: round(x
|
| 59 |
-
elif df[col]
|
| 60 |
-
df[col] = df[col].apply(lambda x: round(x,
|
| 61 |
return df
|
| 62 |
|
| 63 |
|
| 64 |
def add_rank_and_format(df, benchmark_version=1, selected_columns=None, is_pipeline=False):
|
| 65 |
df = df.reset_index()
|
| 66 |
-
|
|
|
|
| 67 |
df = add_rank(df, benchmark_version, selected_columns)
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
# df = remove_duplicates(df)
|
| 70 |
return df
|
| 71 |
|
|
@@ -110,5 +135,7 @@ def get_pipeline_refresh_function(pipeline_handler):
|
|
| 110 |
|
| 111 |
def filter_models(data, search_term):
|
| 112 |
if search_term:
|
| 113 |
-
|
|
|
|
|
|
|
| 114 |
return data
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import math
|
| 3 |
|
| 4 |
+
def make_clickable_model(model_name, link=None, is_pipeline=False, folder_name=None):
|
|
|
|
| 5 |
if is_pipeline:
|
| 6 |
+
# For pipelines: use folder_name for link, model_name (alias) for display
|
| 7 |
+
link_folder = folder_name if folder_name else model_name
|
| 8 |
+
# Process folder name for link: only handle __ and -thisisapoint-
|
| 9 |
+
desanitized_folder = link_folder.replace("__", "/")
|
| 10 |
+
desanitized_folder = desanitized_folder.replace("-thisisapoint-", ".")
|
| 11 |
if link is None:
|
| 12 |
+
link = f"https://github.com/illuin-tech/vidore-benchmark/blob/vidore_v3_pipeline/results/pipeline_descriptions/{desanitized_folder}/description.json"
|
| 13 |
+
# Use word-wrap styling for potentially long pipeline aliases
|
| 14 |
+
return f'<a target="_blank" style="text-decoration: underline; word-wrap: break-word; white-space: normal; display: inline-block; max-width: 450px;" href="{link}">{model_name}</a>'
|
| 15 |
else:
|
| 16 |
# For regular models: replace __ and _ with /, and -thisisapoint- with .
|
| 17 |
desanitized_model_name = model_name.replace("__", "/")
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
def add_rank(df, benchmark_version=1, selected_columns=None):
|
| 33 |
+
# Convert numeric columns to proper float type (they may be 'object' dtype due to mixed data)
|
| 34 |
+
for col in df.columns:
|
| 35 |
+
if col not in ["Model", "Pipeline", "_folder_name"]:
|
| 36 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 37 |
+
|
| 38 |
+
# Only fill NaN for numeric columns to avoid issues with string columns like _folder_name
|
| 39 |
+
numeric_cols = df.select_dtypes(include=["float64", "int64", "float32", "int32"]).columns
|
| 40 |
+
df[numeric_cols] = df[numeric_cols].fillna(0.0)
|
| 41 |
+
|
| 42 |
if selected_columns is None:
|
| 43 |
cols_to_rank = [
|
| 44 |
col
|
|
|
|
| 46 |
if col
|
| 47 |
not in [
|
| 48 |
"Model",
|
| 49 |
+
"Pipeline", # For pipeline tables
|
| 50 |
"Model Size (Million Parameters)",
|
| 51 |
"Memory Usage (GB, fp32)",
|
| 52 |
"Embedding Dimensions",
|
| 53 |
"Max Tokens",
|
| 54 |
"Compute Cost ($)",
|
| 55 |
"Queries per Second",
|
| 56 |
+
"_folder_name", # Hidden column for pipeline link generation
|
| 57 |
]
|
| 58 |
]
|
| 59 |
else:
|
|
|
|
| 63 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
| 64 |
else:
|
| 65 |
# Only add Average column if it doesn't already exist
|
| 66 |
+
if "Average Score" not in df.columns:
|
| 67 |
+
df.insert(len(df.columns) - len(cols_to_rank), "Average Score", df[cols_to_rank].mean(axis=1, skipna=False))
|
| 68 |
+
df.sort_values("Average Score", ascending=False, inplace=True)
|
| 69 |
df.insert(0, "Rank", list(range(1, len(df) + 1)))
|
| 70 |
+
# multiply values by 100 if they are floats and round to 2 decimal places
|
| 71 |
for col in df.columns:
|
| 72 |
+
if pd.api.types.is_numeric_dtype(df[col]) and col not in ["Model Size (Million Parameters)", "Compute Cost ($)", "Queries per Second", "Indexing latency (s/doc)", "Search latency (s/query)", "Rank"]:
|
| 73 |
+
df[col] = df[col].apply(lambda x: round(x*100, 3 - int(math.floor(math.log10(abs(x*100)))) - 1))
|
| 74 |
+
elif pd.api.types.is_numeric_dtype(df[col]) and col in ["Indexing latency (s/doc)", "Search latency (s/query)"]:
|
| 75 |
+
df[col] = df[col].apply(lambda x: round(x, 3 - int(math.floor(math.log10(abs(x)))) - 1))
|
| 76 |
return df
|
| 77 |
|
| 78 |
|
| 79 |
def add_rank_and_format(df, benchmark_version=1, selected_columns=None, is_pipeline=False):
|
| 80 |
df = df.reset_index()
|
| 81 |
+
column_name = "Pipeline" if is_pipeline else "Model"
|
| 82 |
+
df = df.rename(columns={"index": column_name})
|
| 83 |
df = add_rank(df, benchmark_version, selected_columns)
|
| 84 |
+
|
| 85 |
+
if is_pipeline and "_folder_name" in df.columns:
|
| 86 |
+
# For pipelines, use folder_name for link generation
|
| 87 |
+
df[column_name] = df.apply(
|
| 88 |
+
lambda row: make_clickable_model(row[column_name], is_pipeline=True, folder_name=row["_folder_name"]),
|
| 89 |
+
axis=1
|
| 90 |
+
)
|
| 91 |
+
df = df.drop(columns=["_folder_name"])
|
| 92 |
+
else:
|
| 93 |
+
df[column_name] = df[column_name].apply(lambda x: make_clickable_model(x, is_pipeline=is_pipeline))
|
| 94 |
# df = remove_duplicates(df)
|
| 95 |
return df
|
| 96 |
|
|
|
|
| 135 |
|
| 136 |
def filter_models(data, search_term):
|
| 137 |
if search_term:
|
| 138 |
+
# Use "Pipeline" column for pipeline tables, "Model" for others
|
| 139 |
+
col_name = "Pipeline" if "Pipeline" in data.columns else "Model"
|
| 140 |
+
data = data[data[col_name].str.contains(search_term, case=False, na=False)]
|
| 141 |
return data
|
data/pipeline_handler.py
CHANGED
|
@@ -10,10 +10,12 @@ class PipelineHandler:
|
|
| 10 |
|
| 11 |
def __init__(self):
|
| 12 |
self.pipeline_infos = {}
|
|
|
|
| 13 |
self.github_base_url = "https://raw.githubusercontent.com/illuin-tech/vidore-benchmark/vidore_v3_pipeline/results/metrics"
|
|
|
|
| 14 |
self.available_datasets = []
|
| 15 |
self.available_languages = ["english"] # Default languages available
|
| 16 |
-
|
| 17 |
# Setup GitHub authentication if token is available
|
| 18 |
self.github_token = os.environ.get("GITHUB_TOKEN")
|
| 19 |
self.headers = {}
|
|
@@ -62,7 +64,7 @@ class PipelineHandler:
|
|
| 62 |
def fetch_json_from_github(self, pipeline_name: str, filename: str) -> Optional[Dict]:
|
| 63 |
"""Fetch a JSON file from GitHub raw content."""
|
| 64 |
url = f"{self.github_base_url}/{pipeline_name}/{filename}"
|
| 65 |
-
|
| 66 |
try:
|
| 67 |
response = requests.get(url, headers=self.headers)
|
| 68 |
response.raise_for_status()
|
|
@@ -71,6 +73,22 @@ class PipelineHandler:
|
|
| 71 |
print(f"Error fetching {filename} from {pipeline_name}: {e}")
|
| 72 |
return None
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
def get_pipeline_data(self):
|
| 75 |
"""Fetch all pipeline data from GitHub."""
|
| 76 |
pipeline_folders = self.get_pipeline_folders_from_github()
|
|
@@ -99,6 +117,10 @@ class PipelineHandler:
|
|
| 99 |
|
| 100 |
if pipeline_data:
|
| 101 |
self.pipeline_infos[pipeline_name] = pipeline_data
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
self.available_datasets = sorted(list(datasets_set))
|
| 104 |
self.available_languages = sorted(list(languages_set))
|
|
@@ -226,21 +248,25 @@ class PipelineHandler:
|
|
| 226 |
|
| 227 |
# Calculate average across datasets if there are multiple
|
| 228 |
if dataset_metrics:
|
| 229 |
-
row_data["Average"] = round(sum(dataset_metrics.values()) / len(dataset_metrics), 4)
|
| 230 |
-
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
if pipeline_res:
|
| 234 |
df = pd.DataFrame(pipeline_res).T
|
| 235 |
# Reorder columns to have Average right after timing metrics
|
| 236 |
cols = list(df.columns)
|
| 237 |
-
if "Average" in cols:
|
| 238 |
-
cols.remove("Average")
|
| 239 |
# Insert Average after Search latency (s/query)
|
| 240 |
insert_pos = (
|
| 241 |
cols.index("Search latency (s/query)") + 1 if "Search latency (s/query)" in cols else 2
|
| 242 |
)
|
| 243 |
-
cols.insert(insert_pos, "Average")
|
| 244 |
df = df[cols]
|
| 245 |
return df
|
| 246 |
|
|
|
|
| 10 |
|
| 11 |
def __init__(self):
|
| 12 |
self.pipeline_infos = {}
|
| 13 |
+
self.pipeline_aliases = {} # Maps folder_name -> pipeline_alias for display
|
| 14 |
self.github_base_url = "https://raw.githubusercontent.com/illuin-tech/vidore-benchmark/vidore_v3_pipeline/results/metrics"
|
| 15 |
+
self.github_descriptions_base_url = "https://raw.githubusercontent.com/illuin-tech/vidore-benchmark/vidore_v3_pipeline/results/pipeline_descriptions"
|
| 16 |
self.available_datasets = []
|
| 17 |
self.available_languages = ["english"] # Default languages available
|
| 18 |
+
|
| 19 |
# Setup GitHub authentication if token is available
|
| 20 |
self.github_token = os.environ.get("GITHUB_TOKEN")
|
| 21 |
self.headers = {}
|
|
|
|
| 64 |
def fetch_json_from_github(self, pipeline_name: str, filename: str) -> Optional[Dict]:
|
| 65 |
"""Fetch a JSON file from GitHub raw content."""
|
| 66 |
url = f"{self.github_base_url}/{pipeline_name}/{filename}"
|
| 67 |
+
|
| 68 |
try:
|
| 69 |
response = requests.get(url, headers=self.headers)
|
| 70 |
response.raise_for_status()
|
|
|
|
| 73 |
print(f"Error fetching {filename} from {pipeline_name}: {e}")
|
| 74 |
return None
|
| 75 |
|
| 76 |
+
def fetch_pipeline_alias(self, pipeline_name: str) -> Optional[str]:
|
| 77 |
+
"""Fetch the pipeline_alias from description.json for a pipeline.
|
| 78 |
+
|
| 79 |
+
Uses raw.githubusercontent.com to avoid API rate limits.
|
| 80 |
+
"""
|
| 81 |
+
url = f"{self.github_descriptions_base_url}/{pipeline_name}/description.json"
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
response = requests.get(url, headers=self.headers)
|
| 85 |
+
response.raise_for_status()
|
| 86 |
+
description = response.json()
|
| 87 |
+
return description.get("pipeline_alias")
|
| 88 |
+
except Exception as e:
|
| 89 |
+
print(f"Error fetching description for {pipeline_name}: {e}")
|
| 90 |
+
return None
|
| 91 |
+
|
| 92 |
def get_pipeline_data(self):
|
| 93 |
"""Fetch all pipeline data from GitHub."""
|
| 94 |
pipeline_folders = self.get_pipeline_folders_from_github()
|
|
|
|
| 117 |
|
| 118 |
if pipeline_data:
|
| 119 |
self.pipeline_infos[pipeline_name] = pipeline_data
|
| 120 |
+
# Fetch the pipeline alias for display (uses raw URL, not API)
|
| 121 |
+
alias = self.fetch_pipeline_alias(pipeline_name)
|
| 122 |
+
if alias:
|
| 123 |
+
self.pipeline_aliases[pipeline_name] = alias
|
| 124 |
|
| 125 |
self.available_datasets = sorted(list(datasets_set))
|
| 126 |
self.available_languages = sorted(list(languages_set))
|
|
|
|
| 248 |
|
| 249 |
# Calculate average across datasets if there are multiple
|
| 250 |
if dataset_metrics:
|
| 251 |
+
row_data["Average Score"] = round(sum(dataset_metrics.values()) / len(dataset_metrics), 4)
|
| 252 |
+
|
| 253 |
+
# Use pipeline_alias for display if available, otherwise fallback to folder name
|
| 254 |
+
display_name = self.pipeline_aliases.get(pipeline_name, pipeline_name)
|
| 255 |
+
# Store folder name for link generation (will be used in utils.py)
|
| 256 |
+
row_data["_folder_name"] = pipeline_name
|
| 257 |
+
pipeline_res[display_name] = row_data
|
| 258 |
|
| 259 |
if pipeline_res:
|
| 260 |
df = pd.DataFrame(pipeline_res).T
|
| 261 |
# Reorder columns to have Average right after timing metrics
|
| 262 |
cols = list(df.columns)
|
| 263 |
+
if "Average Score" in cols:
|
| 264 |
+
cols.remove("Average Score")
|
| 265 |
# Insert Average after Search latency (s/query)
|
| 266 |
insert_pos = (
|
| 267 |
cols.index("Search latency (s/query)") + 1 if "Search latency (s/query)" in cols else 2
|
| 268 |
)
|
| 269 |
+
cols.insert(insert_pos, "Average Score")
|
| 270 |
df = df[cols]
|
| 271 |
return df
|
| 272 |
|