Text Generation
Transformers
Safetensors
llama
research
code
mathematics
reasoning
multilingual
long-context
custom_code
text-generation-inference
Instructions to use DeepXR/Helion-V2.5-Rnd with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DeepXR/Helion-V2.5-Rnd with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DeepXR/Helion-V2.5-Rnd", trust_remote_code=True)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("DeepXR/Helion-V2.5-Rnd", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("DeepXR/Helion-V2.5-Rnd", trust_remote_code=True) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use DeepXR/Helion-V2.5-Rnd with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DeepXR/Helion-V2.5-Rnd" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DeepXR/Helion-V2.5-Rnd", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/DeepXR/Helion-V2.5-Rnd
- SGLang
How to use DeepXR/Helion-V2.5-Rnd with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DeepXR/Helion-V2.5-Rnd" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DeepXR/Helion-V2.5-Rnd", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DeepXR/Helion-V2.5-Rnd" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DeepXR/Helion-V2.5-Rnd", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use DeepXR/Helion-V2.5-Rnd with Docker Model Runner:
docker model run hf.co/DeepXR/Helion-V2.5-Rnd
| #!/usr/bin/env python3 | |
| """ | |
| Helion-2.5-Rnd Evaluation Script | |
| Comprehensive benchmark evaluation across multiple datasets | |
| """ | |
| import argparse | |
| import json | |
| import logging | |
| import os | |
| from collections import defaultdict | |
| from pathlib import Path | |
| from typing import Dict, List, Optional | |
| import torch | |
| from datasets import load_dataset | |
| from tqdm import tqdm | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| class HelionEvaluator: | |
| """Evaluation framework for Helion model""" | |
| def __init__( | |
| self, | |
| model_path: str, | |
| device: str = "cuda", | |
| batch_size: int = 1, | |
| max_length: int = 2048 | |
| ): | |
| """ | |
| Initialize evaluator | |
| Args: | |
| model_path: Path to model or HuggingFace model ID | |
| device: Device to run evaluation on | |
| batch_size: Batch size for evaluation | |
| max_length: Maximum sequence length | |
| """ | |
| logger.info(f"Loading model from {model_path}") | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| model_path, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| self.device = device | |
| self.batch_size = batch_size | |
| self.max_length = max_length | |
| logger.info("Model loaded successfully") | |
| def generate( | |
| self, | |
| prompt: str, | |
| max_new_tokens: int = 512, | |
| temperature: float = 0.0, | |
| **kwargs | |
| ) -> str: | |
| """Generate text from prompt""" | |
| inputs = self.tokenizer( | |
| prompt, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=self.max_length | |
| ).to(self.device) | |
| with torch.no_grad(): | |
| outputs = self.model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature if temperature > 0 else 1.0, | |
| do_sample=temperature > 0, | |
| pad_token_id=self.tokenizer.pad_token_id, | |
| **kwargs | |
| ) | |
| response = self.tokenizer.decode( | |
| outputs[0][inputs['input_ids'].shape[1]:], | |
| skip_special_tokens=True | |
| ) | |
| return response.strip() | |
| def evaluate_mmlu(self, num_samples: Optional[int] = None) -> Dict: | |
| """Evaluate on MMLU benchmark""" | |
| logger.info("Evaluating on MMLU...") | |
| dataset = load_dataset("cais/mmlu", "all", split="test") | |
| if num_samples: | |
| dataset = dataset.select(range(min(num_samples, len(dataset)))) | |
| correct = 0 | |
| total = 0 | |
| for example in tqdm(dataset, desc="MMLU"): | |
| question = example["question"] | |
| choices = example["choices"] | |
| answer = example["answer"] | |
| # Format prompt | |
| prompt = f"Question: {question}\n\nChoices:\n" | |
| for i, choice in enumerate(choices): | |
| prompt += f"{chr(65+i)}. {choice}\n" | |
| prompt += "\nAnswer: " | |
| # Generate response | |
| response = self.generate(prompt, max_new_tokens=10, temperature=0.0) | |
| # Extract answer | |
| pred = response.strip()[0].upper() if response else "" | |
| correct_answer = chr(65 + answer) | |
| if pred == correct_answer: | |
| correct += 1 | |
| total += 1 | |
| accuracy = correct / total if total > 0 else 0 | |
| return { | |
| "benchmark": "MMLU", | |
| "accuracy": accuracy, | |
| "correct": correct, | |
| "total": total | |
| } | |
| def evaluate_gsm8k(self, num_samples: Optional[int] = None) -> Dict: | |
| """Evaluate on GSM8K mathematical reasoning""" | |
| logger.info("Evaluating on GSM8K...") | |
| dataset = load_dataset("gsm8k", "main", split="test") | |
| if num_samples: | |
| dataset = dataset.select(range(min(num_samples, len(dataset)))) | |
| correct = 0 | |
| total = 0 | |
| for example in tqdm(dataset, desc="GSM8K"): | |
| question = example["question"] | |
| answer = example["answer"] | |
| # Extract numerical answer | |
| import re | |
| match = re.search(r'####\s*(-?\d+(?:,\d+)*(?:\.\d+)?)', answer) | |
| if not match: | |
| continue | |
| correct_answer = match.group(1).replace(',', '') | |
| # Format prompt | |
| prompt = f"Question: {question}\n\nLet's solve this step by step:\n" | |
| # Generate response | |
| response = self.generate(prompt, max_new_tokens=512, temperature=0.0) | |
| # Extract predicted answer | |
| pred_match = re.search(r'(?:answer is|=)\s*(-?\d+(?:,\d+)*(?:\.\d+)?)', response.lower()) | |
| if pred_match: | |
| pred_answer = pred_match.group(1).replace(',', '') | |
| if pred_answer == correct_answer: | |
| correct += 1 | |
| total += 1 | |
| accuracy = correct / total if total > 0 else 0 | |
| return { | |
| "benchmark": "GSM8K", | |
| "accuracy": accuracy, | |
| "correct": correct, | |
| "total": total | |
| } | |
| def evaluate_humaneval(self, num_samples: Optional[int] = None) -> Dict: | |
| """Evaluate on HumanEval code generation""" | |
| logger.info("Evaluating on HumanEval...") | |
| try: | |
| dataset = load_dataset("openai_humaneval", split="test") | |
| except: | |
| logger.warning("HumanEval dataset not available") | |
| return {"benchmark": "HumanEval", "error": "Dataset not available"} | |
| if num_samples: | |
| dataset = dataset.select(range(min(num_samples, len(dataset)))) | |
| results = [] | |
| for example in tqdm(dataset, desc="HumanEval"): | |
| prompt = example["prompt"] | |
| # Generate code | |
| full_prompt = f"Complete the following Python function:\n\n{prompt}" | |
| response = self.generate( | |
| full_prompt, | |
| max_new_tokens=512, | |
| temperature=0.0 | |
| ) | |
| # Extract code | |
| code = prompt + response | |
| results.append({ | |
| "task_id": example["task_id"], | |
| "completion": code, | |
| "test": example["test"] | |
| }) | |
| # Note: Full evaluation requires executing code | |
| # This is a simplified version | |
| return { | |
| "benchmark": "HumanEval", | |
| "samples_generated": len(results), | |
| "note": "Full evaluation requires code execution framework" | |
| } | |
| def evaluate_truthfulqa(self, num_samples: Optional[int] = None) -> Dict: | |
| """Evaluate on TruthfulQA""" | |
| logger.info("Evaluating on TruthfulQA...") | |
| dataset = load_dataset("truthful_qa", "generation", split="validation") | |
| if num_samples: | |
| dataset = dataset.select(range(min(num_samples, len(dataset)))) | |
| responses = [] | |
| for example in tqdm(dataset, desc="TruthfulQA"): | |
| question = example["question"] | |
| prompt = f"Question: {question}\n\nProvide a truthful and accurate answer:\nAnswer: " | |
| response = self.generate(prompt, max_new_tokens=256, temperature=0.0) | |
| responses.append({ | |
| "question": question, | |
| "response": response, | |
| "best_answer": example["best_answer"], | |
| "correct_answers": example["correct_answers"], | |
| "incorrect_answers": example["incorrect_answers"] | |
| }) | |
| return { | |
| "benchmark": "TruthfulQA", | |
| "samples_evaluated": len(responses), | |
| "note": "Manual review required for truthfulness assessment" | |
| } | |
| def evaluate_all( | |
| self, | |
| output_file: Optional[str] = None, | |
| num_samples: Optional[int] = None | |
| ) -> Dict: | |
| """Run all evaluations""" | |
| logger.info("Starting comprehensive evaluation...") | |
| results = { | |
| "model": "DeepXR/Helion-2.5-Rnd", | |
| "benchmarks": {} | |
| } | |
| # Run evaluations | |
| try: | |
| results["benchmarks"]["mmlu"] = self.evaluate_mmlu(num_samples) | |
| except Exception as e: | |
| logger.error(f"MMLU evaluation failed: {e}") | |
| results["benchmarks"]["mmlu"] = {"error": str(e)} | |
| try: | |
| results["benchmarks"]["gsm8k"] = self.evaluate_gsm8k(num_samples) | |
| except Exception as e: | |
| logger.error(f"GSM8K evaluation failed: {e}") | |
| results["benchmarks"]["gsm8k"] = {"error": str(e)} | |
| try: | |
| results["benchmarks"]["humaneval"] = self.evaluate_humaneval(num_samples) | |
| except Exception as e: | |
| logger.error(f"HumanEval evaluation failed: {e}") | |
| results["benchmarks"]["humaneval"] = {"error": str(e)} | |
| try: | |
| results["benchmarks"]["truthfulqa"] = self.evaluate_truthfulqa(num_samples) | |
| except Exception as e: | |
| logger.error(f"TruthfulQA evaluation failed: {e}") | |
| results["benchmarks"]["truthfulqa"] = {"error": str(e)} | |
| # Save results | |
| if output_file: | |
| output_path = Path(output_file) | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| with open(output_path, 'w') as f: | |
| json.dump(results, f, indent=2) | |
| logger.info(f"Results saved to {output_path}") | |
| # Print summary | |
| logger.info("\n" + "="*50) | |
| logger.info("EVALUATION SUMMARY") | |
| logger.info("="*50) | |
| for benchmark, result in results["benchmarks"].items(): | |
| if "accuracy" in result: | |
| logger.info(f"{benchmark.upper()}: {result['accuracy']:.2%}") | |
| elif "error" in result: | |
| logger.info(f"{benchmark.upper()}: ERROR - {result['error']}") | |
| else: | |
| logger.info(f"{benchmark.upper()}: {result.get('note', 'Completed')}") | |
| return results | |
| def main(): | |
| """Main evaluation entry point""" | |
| parser = argparse.ArgumentParser(description="Evaluate Helion model") | |
| parser.add_argument( | |
| "--model", | |
| type=str, | |
| required=True, | |
| help="Model path or HuggingFace ID" | |
| ) | |
| parser.add_argument( | |
| "--benchmarks", | |
| type=str, | |
| nargs="+", | |
| default=["all"], | |
| choices=["all", "mmlu", "gsm8k", "humaneval", "truthfulqa"], | |
| help="Benchmarks to run" | |
| ) | |
| parser.add_argument( | |
| "--output", | |
| type=str, | |
| default="evaluation_results.json", | |
| help="Output file for results" | |
| ) | |
| parser.add_argument( | |
| "--num-samples", | |
| type=int, | |
| default=None, | |
| help="Number of samples to evaluate (for quick testing)" | |
| ) | |
| parser.add_argument( | |
| "--device", | |
| type=str, | |
| default="cuda", | |
| help="Device to use" | |
| ) | |
| parser.add_argument( | |
| "--batch-size", | |
| type=int, | |
| default=1, | |
| help="Batch size" | |
| ) | |
| args = parser.parse_args() | |
| # Initialize evaluator | |
| evaluator = HelionEvaluator( | |
| model_path=args.model, | |
| device=args.device, | |
| batch_size=args.batch_size | |
| ) | |
| # Run evaluations | |
| if "all" in args.benchmarks: | |
| results = evaluator.evaluate_all( | |
| output_file=args.output, | |
| num_samples=args.num_samples | |
| ) | |
| else: | |
| results = {"model": args.model, "benchmarks": {}} | |
| if "mmlu" in args.benchmarks: | |
| results["benchmarks"]["mmlu"] = evaluator.evaluate_mmlu(args.num_samples) | |
| if "gsm8k" in args.benchmarks: | |
| results["benchmarks"]["gsm8k"] = evaluator.evaluate_gsm8k(args.num_samples) | |
| if "humaneval" in args.benchmarks: | |
| results["benchmarks"]["humaneval"] = evaluator.evaluate_humaneval(args.num_samples) | |
| if "truthfulqa" in args.benchmarks: | |
| results["benchmarks"]["truthfulqa"] = evaluator.evaluate_truthfulqa(args.num_samples) | |
| # Save results | |
| with open(args.output, 'w') as f: | |
| json.dump(results, f, indent=2) | |
| logger.info(f"Results saved to {args.output}") | |
| if __name__ == "__main__": | |
| main() |