from sklearn.datasets import load_iris, make_regression
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, r2_score
# Classificação com Iris
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.3,
random_state=42,
stratify=y
)
for depth in [None, 3]:
clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print(
f"depth={depth} | acc={accuracy_score(y_test, pred):.3f} | "
f"f1_macro={f1_score(y_test, pred, average='macro'):.3f} | "
f"depth_real={clf.get_depth()} | leaves={clf.get_n_leaves()}"
)
# Validação cruzada
for depth in [2, 3, 5, None]:
clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
scores = cross_val_score(clf, X, y, cv=5, scoring="accuracy")
print(f"depth={depth} | média={scores.mean():.3f} | desvio={scores.std():.3f}")
# Grid search
param_grid = {
"max_depth": [2, 3, 4, 5, None],
"min_samples_split": [2, 5, 10],
"min_samples_leaf": [1, 2, 4],
"criterion": ["gini", "entropy"]
}
grid = GridSearchCV(
DecisionTreeClassifier(random_state=42),
param_grid=param_grid,
cv=5,
scoring="accuracy",
n_jobs=-1
)
grid.fit(X_train, y_train)
print(grid.best_params_)
# Regressão
X_reg, y_reg = make_regression(n_samples=500, n_features=5, noise=20, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
X_reg,
y_reg,
test_size=0.2,
random_state=42
)
for depth in [3, 6, 10, None]:
reg = DecisionTreeRegressor(max_depth=depth, random_state=42)
reg.fit(X_train_reg, y_train_reg)
pred_reg = reg.predict(X_test_reg)
print(
f"depth={depth} | MAE={mean_absolute_error(y_test_reg, pred_reg):.2f} | "
f"R2={r2_score(y_test_reg, pred_reg):.3f}"
)