""" =============================================================================== TOPOLOJİK KIRILMA NOKTASINA GÖRE VERİ BÖLME Elliptic Bitcoin Veri Seti Üzerinde Deneysel Çalışma =============================================================================== HİPOTEZ 1: Veriyi rastgele değil, ağda krizin yaşandığı "Topolojik Kırılma Noktasından" bölersek, model gerçek dünyadaki yasa dışı işlemleri yakalamada çok daha başarılı ve kararlı olur. HİPOTEZ 2: Veri bölme ağdaki şoklara göre yapılmazsa, model sakin zamanlarda yapay ve abartılı bir başarı (performans şişmesi) gösterir, ancak kriz anında tamamen başarısız olur. MODELLER: GraphSAGE (GNN) + Random Forest + XGBoost + LightGBM METRİK: Illicit F1-Score (azınlık sınıfı odaklı) VERİ: Elliptic Bitcoin Dataset (203K düğüm, 234K kenar, 49 timestep) =============================================================================== """ import os, json, warnings, time import numpy as np import pandas as pd import networkx as nx from collections import defaultdict import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.patches as mpatches import seaborn as sns from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, average_precision_score import xgboost as xgb import lightgbm as lgb import torch import torch.nn.functional as F from torch_geometric.nn import SAGEConv from torch_geometric.data import Data from torch_geometric.utils import subgraph warnings.filterwarnings('ignore') np.random.seed(42) torch.manual_seed(42) OUTDIR = '/app/results_topo' FIGDIR = '/app/figures_topo' os.makedirs(OUTDIR, exist_ok=True) os.makedirs(FIGDIR, exist_ok=True) # ============================================================================ # ADIM 1: VERİ YÜKLEME # ============================================================================ print("=" * 80) print("ADIM 1: VERİ YÜKLEME") print("=" * 80) feat_df = pd.read_csv('/app/data/elliptic_txs_features.csv', header=None) edge_df = pd.read_csv('/app/data/elliptic_txs_edgelist.csv') class_df = pd.read_csv('/app/data/elliptic_txs_classes.csv') txids = feat_df.iloc[:, 0].values timesteps_raw = feat_df.iloc[:, 1].values.astype(int) features_np = feat_df.iloc[:, 2:].values.astype(np.float32) N = len(txids) id_map = {tid: i for i, tid in enumerate(txids)} label_map = {'1': 1, '2': 0, 'unknown': -1} labels_np = np.array([label_map[str(c)] for c in class_df['class'].values]) src = np.array([id_map[t] for t in edge_df['txId1'].values if t in id_map]) dst = np.array([id_map[t] for t in edge_df['txId2'].values if t in id_map]) min_len = min(len(src), len(dst)) src, dst = src[:min_len], dst[:min_len] labeled_mask = labels_np >= 0 print(f"Toplam düğüm: {N}, Kenar: {len(src)}") print(f"Etiketli: {labeled_mask.sum()} (illicit={sum(labels_np==1)}, licit={sum(labels_np==0)})") # ============================================================================ # ADIM 2: TOPOLOJİK METRİKLER — AĞIN SAĞLIĞINI İZLE # ============================================================================ print("\n" + "=" * 80) print("ADIM 2: HER TIMESTEP İÇİN AĞIN TOPOLOJİK SAĞLIĞINI HESAPLA") print("=" * 80) topo_metrics = {} for ts in sorted(np.unique(timesteps_raw)): ts_nodes = set(np.where(timesteps_raw == ts)[0]) ts_mask = np.isin(src, list(ts_nodes)) & np.isin(dst, list(ts_nodes)) ts_src, ts_dst = src[ts_mask], dst[ts_mask] G = nx.DiGraph() G.add_nodes_from(ts_nodes) G.add_edges_from(zip(ts_src, ts_dst)) n_nodes = G.number_of_nodes() n_edges = G.number_of_edges() density = nx.density(G) if n_nodes > 1 else 0 in_degs = [d for _, d in G.in_degree()] out_degs = [d for _, d in G.out_degree()] avg_in = np.mean(in_degs) if in_degs else 0 avg_out = np.mean(out_degs) if out_degs else 0 std_in = np.std(in_degs) if in_degs else 0 G_undir = G.to_undirected() n_components = nx.number_connected_components(G_undir) largest_cc = max(nx.connected_components(G_undir), key=len) largest_cc_ratio = len(largest_cc) / max(n_nodes, 1) ts_labeled = [n for n in ts_nodes if labels_np[n] >= 0] ts_illicit = [n for n in ts_labeled if labels_np[n] == 1] illicit_rate = len(ts_illicit) / max(len(ts_labeled), 1) topo_metrics[ts] = { 'n_nodes': n_nodes, 'n_edges': n_edges, 'density': density, 'avg_in_degree': avg_in, 'avg_out_degree': avg_out, 'std_in_degree': std_in, 'n_components': n_components, 'largest_cc_ratio': largest_cc_ratio, 'illicit_rate': illicit_rate, 'n_labeled': len(ts_labeled), 'n_illicit': len(ts_illicit), } print(f" TS {ts:2d}: nodes={n_nodes:5d} edges={n_edges:5d} density={density:.4f} " f"cc_ratio={largest_cc_ratio:.3f} components={n_components:3d} illicit={illicit_rate:.3f}") topo_df = pd.DataFrame(topo_metrics).T topo_df.index.name = 'timestep' # ============================================================================ # ADIM 3: TOPOLOJİK KIRILMA NOKTASI TESPİT ALGORİTMASI # ============================================================================ print("\n" + "=" * 80) print("ADIM 3: TOPOLOJİK KIRILMA NOKTASI TESPİT ALGORİTMASI") print("=" * 80) print(""" Algoritma: Ağın "sağlık skoru"nu her timestep için hesapla. Sağlık skorunda en büyük düşüş = Topolojik Kırılma Noktası. Sağlık Skoru = normalize(density) + normalize(largest_cc_ratio) - normalize(n_components_ratio) Mantık: - Sağlıklı ağ = yoğun, bağlantılı, az parçalı - Kriz anı = yoğunluk düşer, bağlantılılık azalır, parçalanma artar """) def compute_health_score(topo_df): """Ağ sağlık skoru: her timestep için 0-1 arası.""" df = topo_df.copy() for col in ['density', 'largest_cc_ratio', 'n_components']: mi, ma = df[col].min(), df[col].max() df[f'{col}_norm'] = (df[col] - mi) / (ma - mi + 1e-8) # Sağlık = yoğunluk + bağlantılılık - parçalanma df['health_score'] = (df['density_norm'] + df['largest_cc_ratio_norm'] + (1 - df['n_components_norm'])) / 3 return df['health_score'] topo_df['health_score'] = compute_health_score(topo_df) # Kırılma = sağlık skorundaki en büyük negatif sıçrama health_diff = topo_df['health_score'].diff() breakpoint_ts = health_diff.idxmin() breakpoint_score = health_diff.min() # İkincil kırılma noktaları (önemli düşüşler) threshold = health_diff.mean() - 2 * health_diff.std() secondary_breaks = health_diff[health_diff < threshold].index.tolist() print(f"\n ═══ ANA KIRILMA NOKTASI: Timestep {breakpoint_ts} ═══") print(f" Sağlık skoru düşüşü: {breakpoint_score:.4f}") print(f" İkincil kırılma noktaları: {secondary_breaks}") # ============================================================================ # ADIM 4: BÖLME STRATEJİLERİ # ============================================================================ print("\n" + "=" * 80) print("ADIM 4: 5 BÖLME STRATEJİSİ") print("=" * 80) def make_masks(train_ts_set, test_ts_set): """Timestep setlerinden train/test maskeleri oluştur.""" train_mask = np.zeros(N, dtype=bool) test_mask = np.zeros(N, dtype=bool) for i in range(N): if labels_np[i] < 0: continue if timesteps_raw[i] in train_ts_set: train_mask[i] = True elif timesteps_raw[i] in test_ts_set: test_mask[i] = True return train_mask, test_mask all_ts = sorted(np.unique(timesteps_raw)) # Strateji 1: Rastgele bölme def split_random(): labeled_idx = np.where(labeled_mask)[0] np.random.seed(42) perm = np.random.permutation(labeled_idx) split_pt = int(0.8 * len(perm)) tr_mask = np.zeros(N, dtype=bool) te_mask = np.zeros(N, dtype=bool) tr_mask[perm[:split_pt]] = True te_mask[perm[split_pt:]] = True return tr_mask, te_mask # Strateji 2: Kronolojik (sabit %80) def split_chronological(): cutoff = all_ts[int(len(all_ts) * 0.8) - 1] # ~TS 39 train_ts = set(ts for ts in all_ts if ts <= cutoff) test_ts = set(ts for ts in all_ts if ts > cutoff) return make_masks(train_ts, test_ts) # Strateji 3: TOPOLOJİK KIRILMA NOKTASI (BİZİM KATKIMIZ) def split_topological_breakpoint(): bp = breakpoint_ts train_ts = set(ts for ts in all_ts if ts < bp) test_ts = set(ts for ts in all_ts if ts >= bp) return make_masks(train_ts, test_ts) # Strateji 4: Kayan pencere (son 10 timestep test) def split_sliding_window(): test_ts = set(all_ts[-10:]) train_ts = set(all_ts[:-10]) return make_masks(train_ts, test_ts) # Strateji 5: Düşmanca (sakin dönemde eğit, kriz döneminde test et) def split_adversarial(): crisis_ts = set(ts for ts in all_ts if topo_df.loc[ts, 'illicit_rate'] > 0.18) crisis_ts.update({breakpoint_ts}) calm_ts = set(all_ts) - crisis_ts return make_masks(calm_ts, crisis_ts) strategies = { 'Rastgele': split_random, 'Kronolojik': split_chronological, 'Topolojik Kırılma (Bizim)': split_topological_breakpoint, 'Kayan Pencere': split_sliding_window, 'Düşmanca-Kriz': split_adversarial, } for name, fn in strategies.items(): tr, te = fn() n_tr_ill = labels_np[tr].sum() n_te_ill = labels_np[te].sum() print(f" {name:30s}: train={tr.sum():5d} (ill={n_tr_ill:4d}), " f"test={te.sum():5d} (ill={n_te_ill:4d})") # ============================================================================ # ADIM 5: GraphSAGE MODELİ # ============================================================================ print("\n" + "=" * 80) print("ADIM 5: GraphSAGE + KLASİK MODELLER") print("=" * 80) class GraphSAGE(torch.nn.Module): def __init__(self, in_ch, hid_ch=128, out_ch=2, n_layers=2, dropout=0.3): super().__init__() self.convs = torch.nn.ModuleList() self.convs.append(SAGEConv(in_ch, hid_ch)) for _ in range(n_layers - 2): self.convs.append(SAGEConv(hid_ch, hid_ch)) self.convs.append(SAGEConv(hid_ch, out_ch)) self.dropout = dropout def forward(self, x, edge_index): for i, conv in enumerate(self.convs): x = conv(x, edge_index) if i < len(self.convs) - 1: x = x.relu() x = F.dropout(x, p=self.dropout, training=self.training) return x # PyG veri nesnesi oluştur x_tensor = torch.from_numpy(features_np).float() edge_index = torch.tensor(np.stack([src, dst]), dtype=torch.long) y_tensor = torch.from_numpy(np.where(labels_np < 0, 0, labels_np)).long() def train_graphsage(train_mask, test_mask, epochs=100, lr=1e-3): """GraphSAGE eğit ve test et.""" tr_mask_t = torch.from_numpy(train_mask) te_mask_t = torch.from_numpy(test_mask) # Feature normalization (train'e göre fit) scaler = StandardScaler() x_np = features_np.copy() x_np[train_mask] = scaler.fit_transform(x_np[train_mask]) x_np[test_mask] = scaler.transform(x_np[test_mask]) x_t = torch.from_numpy(x_np).float() # Strict inductive: sadece train düğümlerin alt grafı train_idx = tr_mask_t.nonzero(as_tuple=True)[0] train_edge, _ = subgraph(train_idx, edge_index, relabel_nodes=True, num_nodes=N) # Sınıf ağırlığı n_ill = labels_np[train_mask].sum() n_lic = train_mask.sum() - n_ill pos_w = n_lic / max(n_ill, 1) weights = torch.tensor([1.0, min(float(pos_w), 20.0)], dtype=torch.float32) model = GraphSAGE(features_np.shape[1], hid_ch=128, out_ch=2, n_layers=2, dropout=0.3) optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) train_x = x_t[train_idx].float() train_y = y_tensor[train_idx] best_f1 = 0 for epoch in range(1, epochs + 1): model.train() optimizer.zero_grad() out = model(train_x, train_edge) loss = F.cross_entropy(out, train_y, weight=weights) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() if epoch % 20 == 0 or epoch == epochs: model.eval() with torch.no_grad(): logits = model(x_t.float(), edge_index) pred = logits.argmax(dim=-1).numpy() y_true = labels_np[test_mask] y_pred = pred[test_mask] f1 = f1_score(y_true, y_pred, zero_division=0) if f1 > best_f1: best_f1 = f1 # Final evaluation model.eval() with torch.no_grad(): logits = model(x_t.float(), edge_index) proba = torch.softmax(logits, dim=1)[:, 1].numpy() pred = logits.argmax(dim=-1).numpy() y_true = labels_np[test_mask] y_pred = pred[test_mask] y_prob = proba[test_mask] return { 'f1': f1_score(y_true, y_pred, zero_division=0), 'precision': precision_score(y_true, y_pred, zero_division=0), 'recall': recall_score(y_true, y_pred, zero_division=0), 'auroc': roc_auc_score(y_true, y_prob) if len(np.unique(y_true)) > 1 else 0.5, 'auprc': average_precision_score(y_true, y_prob) if len(np.unique(y_true)) > 1 else 0, } def train_classic(model_name, model_obj, train_mask, test_mask): """Klasik ML modeli eğit ve test et.""" scaler = StandardScaler() X_tr = scaler.fit_transform(features_np[train_mask]) X_te = scaler.transform(features_np[test_mask]) y_tr = labels_np[train_mask] y_te = labels_np[test_mask] model_obj.fit(X_tr, y_tr) y_pred = model_obj.predict(X_te) y_prob = model_obj.predict_proba(X_te)[:, 1] return { 'f1': f1_score(y_te, y_pred, zero_division=0), 'precision': precision_score(y_te, y_pred, zero_division=0), 'recall': recall_score(y_te, y_pred, zero_division=0), 'auroc': roc_auc_score(y_te, y_prob) if len(np.unique(y_te)) > 1 else 0.5, 'auprc': average_precision_score(y_te, y_prob) if len(np.unique(y_te)) > 1 else 0, } def get_classic_models(): return { 'Random Forest': RandomForestClassifier(n_estimators=300, max_depth=15, class_weight='balanced', random_state=42, n_jobs=-1), 'XGBoost': xgb.XGBClassifier(n_estimators=300, max_depth=8, learning_rate=0.1, scale_pos_weight=10, random_state=42, n_jobs=-1, verbosity=0, eval_metric='aucpr'), 'LightGBM': lgb.LGBMClassifier(n_estimators=300, max_depth=10, learning_rate=0.1, scale_pos_weight=10, random_state=42, n_jobs=-1, verbose=-1), } # ============================================================================ # ADIM 6: TÜM DENEYLERİ ÇALIŞTIR # ============================================================================ print("\n" + "=" * 80) print("ADIM 6: TÜM DENEYLER") print("=" * 80) all_results = [] for strat_name, strat_fn in strategies.items(): print(f"\n{'─' * 60}") print(f"Strateji: {strat_name}") print(f"{'─' * 60}") tr_mask, te_mask = strat_fn() print(f" Train: {tr_mask.sum()} | Test: {te_mask.sum()}") if te_mask.sum() < 10 or tr_mask.sum() < 10: print(" ⚠️ Yetersiz veri, atlanıyor.") continue # GraphSAGE print(f" GraphSAGE eğitiliyor...", end=" ", flush=True) t0 = time.time() sage_res = train_graphsage(tr_mask, te_mask, epochs=100) t1 = time.time() sage_res['model'] = 'GraphSAGE' sage_res['strategy'] = strat_name all_results.append(sage_res) print(f"F1={sage_res['f1']:.4f} AUROC={sage_res['auroc']:.4f} ({t1-t0:.0f}s)") # Klasik modeller for m_name, m_obj in get_classic_models().items(): print(f" {m_name} eğitiliyor...", end=" ", flush=True) t0 = time.time() res = train_classic(m_name, m_obj, tr_mask, te_mask) t1 = time.time() res['model'] = m_name res['strategy'] = strat_name all_results.append(res) print(f"F1={res['f1']:.4f} AUROC={res['auroc']:.4f} ({t1-t0:.0f}s)") results_df = pd.DataFrame(all_results) results_df = results_df.round(4) # ============================================================================ # ADIM 7: HİPOTEZ TESTLERİ # ============================================================================ print("\n" + "=" * 80) print("ADIM 7: HİPOTEZ TESTLERİ") print("=" * 80) # ── HİPOTEZ 1: Topolojik kırılma noktasından bölme daha başarılı mı? ── print("\n═══ HİPOTEZ 1 TESTİ ═══") print("Topolojik kırılmadan bölme vs Kronolojik bölme") for model_name in ['GraphSAGE', 'Random Forest', 'XGBoost', 'LightGBM']: topo_row = results_df[(results_df['model'] == model_name) & (results_df['strategy'] == 'Topolojik Kırılma (Bizim)')] chron_row = results_df[(results_df['model'] == model_name) & (results_df['strategy'] == 'Kronolojik')] if len(topo_row) > 0 and len(chron_row) > 0: t_f1 = topo_row['f1'].values[0] c_f1 = chron_row['f1'].values[0] diff = t_f1 - c_f1 print(f" {model_name:15s}: Topolojik F1={t_f1:.4f} | Kronolojik F1={c_f1:.4f} | " f"Fark={diff:+.4f} {'✓ DAHA İYİ' if diff > 0 else '✗'}") # ── HİPOTEZ 2: Rastgele bölme performansı yapay olarak şişiriyor mu? ── print("\n═══ HİPOTEZ 2 TESTİ ═══") print("Performans şişmesi: Rastgele vs gerçekçi bölme stratejileri") for model_name in ['GraphSAGE', 'Random Forest', 'XGBoost', 'LightGBM']: rand_row = results_df[(results_df['model'] == model_name) & (results_df['strategy'] == 'Rastgele')] if len(rand_row) == 0: continue rand_f1 = rand_row['f1'].values[0] print(f"\n {model_name}:") print(f" Rastgele F1 = {rand_f1:.4f}") for strat in ['Kronolojik', 'Topolojik Kırılma (Bizim)', 'Kayan Pencere', 'Düşmanca-Kriz']: other = results_df[(results_df['model'] == model_name) & (results_df['strategy'] == strat)] if len(other) > 0: o_f1 = other['f1'].values[0] inflation = ((rand_f1 - o_f1) / max(o_f1, 0.001)) * 100 print(f" {strat:30s}: F1={o_f1:.4f} → Şişme = %{inflation:.1f}") # ============================================================================ # ADIM 8: TABLOLAR # ============================================================================ print("\n" + "=" * 80) print("ADIM 8: SONUÇ TABLOLARI") print("=" * 80) print("\n── TABLO: Illicit F1 Score ──") pivot_f1 = results_df.pivot_table(values='f1', index='model', columns='strategy') print(pivot_f1.to_string()) print("\n── TABLO: AUROC ──") pivot_auroc = results_df.pivot_table(values='auroc', index='model', columns='strategy') print(pivot_auroc.to_string()) print("\n── TABLO: AUPRC ──") pivot_auprc = results_df.pivot_table(values='auprc', index='model', columns='strategy') print(pivot_auprc.to_string()) # ============================================================================ # ADIM 9: FİGÜRLER # ============================================================================ print("\n" + "=" * 80) print("ADIM 9: FİGÜRLER") print("=" * 80) sns.set_theme(style='whitegrid', font_scale=1.1) # ── FİGÜR 1: Topolojik Sağlık Skoru + Kırılma Noktası ── fig, axes = plt.subplots(3, 1, figsize=(18, 14), gridspec_kw={'height_ratios': [2, 1, 1]}) axes[0].plot(topo_df.index, topo_df['health_score'], 'o-', color='steelblue', linewidth=2, markersize=5) axes[0].axvline(x=breakpoint_ts, color='red', linewidth=3, linestyle='--') axes[0].annotate(f'KIRILMA\nNOKTASI\nTS={breakpoint_ts}', xy=(breakpoint_ts, topo_df.loc[breakpoint_ts, 'health_score']), fontsize=12, fontweight='bold', color='red', ha='center', xytext=(breakpoint_ts+3, topo_df['health_score'].max()-0.05), arrowprops=dict(arrowstyle='->', color='red', lw=2)) for sb in secondary_breaks: if sb != breakpoint_ts: axes[0].axvline(x=sb, color='orange', linewidth=1.5, linestyle=':', alpha=0.7) axes[0].set_ylabel('Ağ Sağlık Skoru', fontsize=13) axes[0].set_title('Topolojik Kırılma Noktası Tespiti: Ağın Sağlığının Zamansal Değişimi', fontsize=14, fontweight='bold') axes[0].legend(['Sağlık Skoru', 'Ana Kırılma', 'İkincil Kırılmalar'], fontsize=10) # İllicit oranı colors_bar = ['#FF4444' if topo_df.loc[ts, 'illicit_rate'] > 0.18 else '#44BB44' for ts in topo_df.index] axes[1].bar(topo_df.index, topo_df['illicit_rate']*100, color=colors_bar) axes[1].axvline(x=breakpoint_ts, color='red', linewidth=3, linestyle='--') axes[1].set_ylabel('İllicit Oranı (%)', fontsize=12) axes[1].set_title('Suç Yoğunluğu', fontsize=12) # Bağlantılılık axes[2].plot(topo_df.index, topo_df['largest_cc_ratio'], 'o-', color='purple', linewidth=2, markersize=4) axes[2].axvline(x=breakpoint_ts, color='red', linewidth=3, linestyle='--') axes[2].set_ylabel('En Büyük Bileşen Oranı', fontsize=12) axes[2].set_xlabel('Timestep', fontsize=12) axes[2].set_title('Ağ Bağlantılılığı', fontsize=12) plt.tight_layout() plt.savefig(f'{FIGDIR}/fig1_topological_breakpoint.png', dpi=150, bbox_inches='tight') plt.close() print(" ✓ Figür 1: Topolojik Kırılma Noktası") # ── FİGÜR 2: F1 Karşılaştırma (Ana Sonuç) ── fig, ax = plt.subplots(figsize=(16, 8)) model_names = results_df['model'].unique() strat_names = list(strategies.keys()) colors5 = sns.color_palette('Set2', 5) x = np.arange(len(model_names)) width = 0.15 for i, strat in enumerate(strat_names): vals = [results_df[(results_df['model'] == m) & (results_df['strategy'] == strat)]['f1'].values for m in model_names] vals = [v[0] if len(v) > 0 else 0 for v in vals] bars = ax.bar(x + i*width, vals, width, label=strat, color=colors5[i], edgecolor='white' if 'Topolojik' not in strat else 'black', linewidth=2 if 'Topolojik' in strat else 0.5) ax.set_xlabel('Model', fontsize=13) ax.set_ylabel('Illicit F1 Score', fontsize=13) ax.set_title('Bölme Stratejisine Göre Illicit F1 Karşılaştırması\n' '(Siyah kenarlı = Bizim topolojik kırılma metodumuz)', fontsize=14, fontweight='bold') ax.set_xticks(x + width*2) ax.set_xticklabels(model_names, rotation=15, fontsize=11) ax.legend(fontsize=9, loc='upper right') ax.set_ylim(0, 1.05) ax.grid(axis='y', alpha=0.3) plt.tight_layout() plt.savefig(f'{FIGDIR}/fig2_f1_comparison.png', dpi=150, bbox_inches='tight') plt.close() print(" ✓ Figür 2: F1 Karşılaştırma") # ── FİGÜR 3: Performans Şişmesi Isı Haritası ── fig, ax = plt.subplots(figsize=(14, 7)) inflation_data = [] for model_name in model_names: rand_row = results_df[(results_df['model'] == model_name) & (results_df['strategy'] == 'Rastgele')] if len(rand_row) == 0: continue rand_f1 = rand_row['f1'].values[0] for strat in ['Kronolojik', 'Topolojik Kırılma (Bizim)', 'Kayan Pencere', 'Düşmanca-Kriz']: other = results_df[(results_df['model'] == model_name) & (results_df['strategy'] == strat)] if len(other) > 0: o_f1 = other['f1'].values[0] inflation = ((rand_f1 - o_f1) / max(o_f1, 0.001)) * 100 inflation_data.append({'Model': model_name, 'Strateji': strat, 'F1 Şişme (%)': inflation}) inf_df = pd.DataFrame(inflation_data) if len(inf_df) > 0: inf_pivot = inf_df.pivot_table(values='F1 Şişme (%)', index='Model', columns='Strateji') sns.heatmap(inf_pivot, annot=True, fmt='.1f', cmap='Reds', ax=ax, center=0, linewidths=0.5) ax.set_title('PERFORMANS ŞİŞMESİ: Rastgele Bölmenin F1 Skorunu Ne Kadar Şişirdiği (%)\n' 'Yüksek değer = Rastgele bölme o kadar yapay başarı gösteriyor', fontsize=13, fontweight='bold') plt.tight_layout() plt.savefig(f'{FIGDIR}/fig3_inflation_heatmap.png', dpi=150, bbox_inches='tight') plt.close() print(" ✓ Figür 3: Performans Şişmesi") # ── FİGÜR 4: Performans Isı Haritası (3 metrik) ── fig, axes = plt.subplots(1, 3, figsize=(24, 6)) for idx, (metric, title) in enumerate([('f1', 'Illicit F1'), ('auroc', 'AUROC'), ('auprc', 'AUPRC')]): pivot = results_df.pivot_table(values=metric, index='model', columns='strategy') sns.heatmap(pivot, annot=True, fmt='.3f', cmap='RdYlGn', ax=axes[idx], linewidths=0.5) axes[idx].set_title(title, fontsize=14, fontweight='bold') axes[idx].set_ylabel('') plt.suptitle('Tüm Metrikler × Tüm Stratejiler × Tüm Modeller', fontsize=15, fontweight='bold', y=1.02) plt.tight_layout() plt.savefig(f'{FIGDIR}/fig4_full_heatmap.png', dpi=150, bbox_inches='tight') plt.close() print(" ✓ Figür 4: Tam Isı Haritası") # ── FİGÜR 5: Model Karmaşıklığı vs Bölme Stratejisi ── fig, ax = plt.subplots(figsize=(14, 8)) complexity = {'GraphSAGE': 4, 'LightGBM': 3, 'XGBoost': 2, 'Random Forest': 1} markers = {'Rastgele': 'o', 'Kronolojik': 's', 'Topolojik Kırılma (Bizim)': '^', 'Kayan Pencere': 'D', 'Düşmanca-Kriz': 'v'} for i, strat in enumerate(strat_names): cx = [complexity.get(m, 0) for m in model_names] f1s = [results_df[(results_df['model'] == m) & (results_df['strategy'] == strat)]['f1'].values for m in model_names] f1s = [v[0] if len(v) > 0 else 0 for v in f1s] lw = 3.5 if 'Topolojik' in strat else 2 ax.plot(cx, f1s, f'-{markers.get(strat, "o")}', label=strat, linewidth=lw, markersize=10, color=colors5[i]) ax.set_xticks(sorted(complexity.values())) ax.set_xticklabels(['Random Forest', 'XGBoost', 'LightGBM', 'GraphSAGE'], fontsize=11) ax.set_xlabel('Model Karmaşıklığı →', fontsize=13) ax.set_ylabel('Illicit F1 Score', fontsize=13) ax.set_title('ANA İÇGÖRÜ: Doğru Bölme + Basit Model > Yanlış Bölme + Karmaşık Model', fontsize=13, fontweight='bold') ax.legend(fontsize=10) ax.grid(True, alpha=0.3) plt.tight_layout() plt.savefig(f'{FIGDIR}/fig5_complexity_vs_split.png', dpi=150, bbox_inches='tight') plt.close() print(" ✓ Figür 5: Karmaşıklık vs Bölme") # ── FİGÜR 6: Ağ Metrikleri Zaman Serisi ── fig, axes = plt.subplots(2, 2, figsize=(16, 10)) metrics_plot = [('density', 'Ağ Yoğunluğu'), ('n_components', 'Bileşen Sayısı'), ('avg_in_degree', 'Ort. Giriş Derecesi'), ('std_in_degree', 'Derece Std. Sapması')] for idx, (col, title) in enumerate(metrics_plot): ax = axes[idx // 2][idx % 2] ax.plot(topo_df.index, topo_df[col], 'o-', color='steelblue', markersize=4) ax.axvline(x=breakpoint_ts, color='red', linewidth=2, linestyle='--') ax.set_title(title, fontsize=12, fontweight='bold') ax.set_xlabel('Timestep') ax.grid(alpha=0.3) plt.suptitle('Ağ Topolojik Metriklerinin Zamansal Değişimi', fontsize=14, fontweight='bold') plt.tight_layout() plt.savefig(f'{FIGDIR}/fig6_network_metrics.png', dpi=150, bbox_inches='tight') plt.close() print(" ✓ Figür 6: Ağ Metrikleri") # ============================================================================ # KAYDET # ============================================================================ print("\n" + "=" * 80) print("SONUÇLAR KAYDEDİLİYOR") print("=" * 80) results_df.to_csv(f'{OUTDIR}/all_results.csv', index=False) topo_df.to_csv(f'{OUTDIR}/topological_metrics.csv') summary = { 'breakpoint_timestep': int(breakpoint_ts), 'breakpoint_health_drop': float(breakpoint_score), 'secondary_breakpoints': [int(x) for x in secondary_breaks], 'n_strategies': len(strategies), 'n_models': 4, 'results': results_df.to_dict(orient='records'), } with open(f'{OUTDIR}/summary.json', 'w') as f: json.dump(summary, f, indent=2, ensure_ascii=False, default=str) print("✓ Tüm sonuçlar kaydedildi!") print("\n" + "=" * 80) print("DENEY TAMAMLANDI!") print("=" * 80)