csv-handlerHandle CSV files from construction software exports. Auto-detect delimiters, encodings, and clean messy data.
Install via ClawdBot CLI:
clawdbot install datadrivenconstruction/csv-handlerCSV is the universal exchange format in construction - from scheduling exports to cost databases. This skill handles encoding issues, delimiter detection, and data cleaning.
```python
import pandas as pd
import csv
from typing import Dict, Any, List, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass
import chardet
@dataclass
class CSVProfile:
"""Profile of CSV file."""
encoding: str
delimiter: str
has_header: bool
row_count: int
column_count: int
columns: List[str]
class ConstructionCSVHandler:
"""Handle CSV files from construction software."""
COMMON_DELIMITERS = [',', ';', '\t', '|']
COMMON_ENCODINGS = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']
def init(self):
self.last_profile: Optional[CSVProfile] = None
def detect_encoding(self, file_path: str) -> str:
"""Detect file encoding."""
with open(file_path, 'rb') as f:
raw = f.read(10000)
result = chardet.detect(raw)
return result.get('encoding', 'utf-8') or 'utf-8'
def detect_delimiter(self, file_path: str, encoding: str) -> str:
"""Detect CSV delimiter."""
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
sample = f.read(5000)
# Count occurrences
counts = {d: sample.count(d) for d in self.COMMON_DELIMITERS}
# Return most common that appears consistently
if counts:
return max(counts, key=counts.get)
return ','
def profile_csv(self, file_path: str) -> CSVProfile:
"""Profile CSV file."""
encoding = self.detect_encoding(file_path)
delimiter = self.detect_delimiter(file_path, encoding)
# Read sample
df = pd.read_csv(file_path, encoding=encoding, delimiter=delimiter,
nrows=10, on_bad_lines='skip')
has_header = not df.columns[0].replace('.', '').replace('-', '').isdigit()
# Full row count
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
row_count = sum(1 for _ in f) - (1 if has_header else 0)
profile = CSVProfile(
encoding=encoding,
delimiter=delimiter,
has_header=has_header,
row_count=row_count,
column_count=len(df.columns),
columns=list(df.columns)
)
self.last_profile = profile
return profile
def read_csv(self, file_path: str,
encoding: Optional[str] = None,
delimiter: Optional[str] = None,
clean: bool = True) -> pd.DataFrame:
"""Read CSV with auto-detection."""
# Auto-detect if not provided
if encoding is None:
encoding = self.detect_encoding(file_path)
if delimiter is None:
delimiter = self.detect_delimiter(file_path, encoding)
# Read with error handling
df = pd.read_csv(
file_path,
encoding=encoding,
delimiter=delimiter,
on_bad_lines='skip',
low_memory=False
)
if clean:
df = self.clean_dataframe(df)
return df
def clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
"""Clean construction CSV data."""
# Clean column names
df.columns = [self._clean_column_name(c) for c in df.columns]
# Remove empty rows and columns
df = df.dropna(how='all')
df = df.dropna(axis=1, how='all')
# Strip whitespace from strings
for col in df.select_dtypes(include=['object']):
df[col] = df[col].str.strip() if df[col].dtype == 'object' else df[col]
return df
def _clean_column_name(self, name: str) -> str:
"""Clean column name."""
if not isinstance(name, str):
return str(name)
# Remove special characters, replace spaces
clean = name.strip().lower()
clean = clean.replace(' ', '_').replace('-', '_')
clean = ''.join(c for c in clean if c.isalnum() or c == '_')
return clean
def merge_csvs(self, file_paths: List[str],
on_column: Optional[str] = None) -> pd.DataFrame:
"""Merge multiple CSV files."""
dfs = []
for path in file_paths:
df = self.read_csv(path)
df['_source_file'] = Path(path).name
dfs.append(df)
if not dfs:
return pd.DataFrame()
if on_column and on_column in dfs[0].columns:
result = dfs[0]
for df in dfs[1:]:
result = pd.merge(result, df, on=on_column, how='outer')
return result
return pd.concat(dfs, ignore_index=True)
def split_csv(self, df: pd.DataFrame,
group_column: str,
output_dir: str) -> List[str]:
"""Split CSV by column values."""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
files = []
for value in df[group_column].unique():
subset = df[df[group_column] == value]
filename = f"{group_column}_{value}.csv"
filepath = output_path / filename
subset.to_csv(filepath, index=False)
files.append(str(filepath))
return files
def convert_types(self, df: pd.DataFrame,
type_map: Dict[str, str] = None) -> pd.DataFrame:
"""Convert column types intelligently."""
df = df.copy()
if type_map:
for col, dtype in type_map.items():
if col in df.columns:
try:
df[col] = df[col].astype(dtype)
except:
pass
else:
# Auto-convert
for col in df.columns:
# Try numeric
try:
df[col] = pd.to_numeric(df[col])
continue
except:
pass
# Try datetime
try:
df[col] = pd.to_datetime(df[col])
except:
pass
return df
def export_csv(self, df: pd.DataFrame,
file_path: str,
encoding: str = 'utf-8-sig',
delimiter: str = ',') -> str:
"""Export DataFrame to CSV."""
df.to_csv(file_path, encoding=encoding, sep=delimiter, index=False)
return file_path
class ScheduleCSVHandler(ConstructionCSVHandler):
"""Handler for project schedule CSVs."""
SCHEDULE_COLUMNS = ['task_id', 'task_name', 'start_date', 'end_date',
'duration', 'predecessors', 'resources']
def parse_schedule(self, file_path: str) -> pd.DataFrame:
"""Parse schedule CSV."""
df = self.read_csv(file_path)
# Convert date columns
for col in df.columns:
if 'date' in col.lower() or 'start' in col.lower() or 'end' in col.lower():
try:
df[col] = pd.to_datetime(df[col])
except:
pass
return df
class CostCSVHandler(ConstructionCSVHandler):
"""Handler for cost/estimate CSVs."""
def parse_costs(self, file_path: str) -> pd.DataFrame:
"""Parse cost CSV."""
df = self.read_csv(file_path)
# Find and convert numeric columns
for col in df.columns:
if any(word in col.lower() for word in ['cost', 'price', 'amount', 'total', 'qty', 'quantity']):
df[col] = pd.to_numeric(df[col].replace(r'[\$,]', '', regex=True), errors='coerce')
return df
```
```python
handler = ConstructionCSVHandler()
profile = handler.profile_csv("export.csv")
print(f"Encoding: {profile.encoding}, Delimiter: '{profile.delimiter}'")
df = handler.read_csv("export.csv")
print(f"Loaded {len(df)} rows, {len(df.columns)} columns")
```
```python
files = ["jan_export.csv", "feb_export.csv", "mar_export.csv"]
merged = handler.merge_csvs(files)
```
```python
handler.split_csv(df, group_column='category', output_dir='./split_files')
```
```python
schedule_handler = ScheduleCSVHandler()
schedule = schedule_handler.parse_schedule("p6_export.csv")
```
Generated Mar 1, 2026
Import CSV exports from scheduling software like Primavera or Microsoft Project to analyze project timelines. The skill auto-detects delimiters and cleans messy data, ensuring accurate loading into analysis tools for tracking delays and resource allocation.
Process CSV files from cost estimation software to merge material and labor cost data. The skill handles encoding issues and merges multiple files, enabling consolidated reporting for budget tracking and forecasting in construction projects.
Clean and profile CSV logs from equipment management systems to monitor maintenance schedules and downtime. The skill detects delimiters and removes empty rows, facilitating analysis for preventive maintenance planning and operational efficiency.
Handle CSV invoices from subcontractors by auto-detecting encodings and cleaning column names. This ensures consistent data formatting for payment processing and compliance checks, reducing manual errors in financial workflows.
Merge multiple CSV files from safety inspection reports across construction sites. The skill profiles each file and splits data by site or date, enabling centralized analysis for compliance monitoring and risk assessment.
Offer the skill as part of a cloud-based data processing platform for construction firms. Charge a monthly fee per user or project, providing automated CSV handling with updates for new software exports and compliance standards.
Provide custom integration and training services using the skill for large construction companies. Revenue comes from one-time setup fees and ongoing support contracts to optimize data workflows and reporting.
Release a free version with basic CSV profiling and cleaning, then upsell to a premium tier with advanced features like batch merging and API access. Target small to mid-sized construction businesses looking to automate data tasks.
💬 Integration Tip
Ensure Python3 is installed and test with sample CSV files from common construction software to verify delimiter and encoding detection before full deployment.
Guide any property decision for buyers, sellers, landlords, investors, or agents in any jurisdiction.
Use when designing new system architecture, reviewing existing designs, or making architectural decisions. Invoke for system design, architecture review, design patterns, ADRs, scalability planning.
Document significant technical decisions with context, rationale, and consequences to maintain clear, lightweight architectural records for future reference.
Predict construction project costs using Machine Learning. Use Linear Regression, K-Nearest Neighbors, and Random Forest models on historical project data. Train, evaluate, and deploy cost prediction models.
Generate photorealistic architectural renders and visualizations using each::sense AI. Create exterior views, interior renders, sketch-to-render conversions,...
Dual-stream event publishing combining Kafka for durability with Redis Pub/Sub for real-time delivery. Use when building event-driven systems needing both guaranteed delivery and low-latency updates. Triggers on dual stream, event publishing, Kafka Redis, real-time events, pub/sub, streaming architecture.