def evaluate_models_unsupervised(model_endpoints, test_data):
    client = boto3.client('runtime.sagemaker')
    model_results = {}
    
    for model_name, endpoint_name in model_endpoints.items():
        print(f"\nEvaluating {model_name}...")
        predictions = []
        confidences = []
        inference_times = []
        
        for text in test_data['text']:
            try:
                start_time = time.time()
                response = client.invoke_endpoint(
                    EndpointName=endpoint_name,
                    ContentType='application/x-text',
                    Body=text
                )
                end_time = time.time()
                
                result = json.loads(response['Body'].read())
                probs = result['probabilities']
                predicted_label = 'LABEL_1' if probs[1] > probs[0] else 'LABEL_0'
                predictions.append(predicted_label)
                confidences.append(max(probs))
                inference_times.append((end_time - start_time) * 1000)
                
            except Exception as e:
                print(f"    Error: {e}")
                predictions.append('UNKNOWN')
                confidences.append(0.5)
                inference_times.append(0)
        
        model_results[model_name] = {
            'predictions': predictions,
            'confidences': confidences,
            'inference_times': inference_times
        }
        
        avg_confidence = sum(confidences) / len(confidences) if confidences else 0
        avg_speed = sum(inference_times) / len(inference_times) if inference_times else 0
        
        # Calculate additional metrics
        pred_dist = {pred: predictions.count(pred) for pred in set(predictions)}
        conf_variance = np.var(confidences)
        
        print(f"    Avg Confidence: {avg_confidence:.3f}")
        print(f"    Avg Speed: {avg_speed:.1f}ms")
        print(f"    Prediction Distribution: {pred_dist}")
        print(f"    Confidence Variance: {conf_variance:.6f}")
        print(f"    Speed Consistency: {np.std(inference_times):.1f}ms std")
    
    return model_results

def calculate_unsupervised_metrics(model_results):
    metrics = {}
    model_names = list(model_results.keys())
    
    print(f"\nRunning Unsupervised Evaluation...")
    print("=" * 50)
    
    for model_name in model_names:
        model_data = model_results[model_name]
        confidences = model_data['confidences']
        metrics[model_name] = {
            'avg_confidence': sum(confidences) / len(confidences),
            'avg_inference_time': sum(model_data['inference_times']) / len(model_data['inference_times']),
            'prediction_distribution': {
                pred: model_data['predictions'].count(pred) 
                for pred in set(model_data['predictions'])
            },
            'confidence_stats': {
                'mean': sum(confidences) / len(confidences),
                'variance': np.var(confidences),
                'min': min(confidences),
                'max': max(confidences)
            }
        }
    
    # Cross-model analysis
    if len(model_names) > 1:
        print(f"\nCross-Model Analysis:")
        print("-" * 30)
        
        # Model agreement
        all_preds = [model_results[name]['predictions'] for name in model_names]
        agreements = sum(1 for i in range(len(all_preds[0])) if len(set(pred[i] for pred in all_preds)) == 1)
        agreement_rate = agreements / len(all_preds[0])
        print(f"Model Agreement: {agreement_rate:.1%}")
        
        # Speed comparison
        speeds = [metrics[name]['avg_inference_time'] for name in model_names]
        fastest = model_names[speeds.index(min(speeds))]
        print(f"Fastest Model: {fastest} ({min(speeds):.1f}ms)")
        
        # Confidence comparison
        confidences = [metrics[name]['avg_confidence'] for name in model_names]
        most_confident = model_names[confidences.index(max(confidences))]
        print(f"Most Confident: {most_confident} ({max(confidences):.3f})")
    
    return metrics
