Source code for modeling.models

""" Wrappers around various models"""
import numpy as np
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.neighbors.kde import KernelDensity


[docs]class KMeansModel(object): """ Wrapper class for Scikit Learn's KMeans clustering. Attributes ---------- n_clusters : int Number of clusters for KMeans model : KMeans Wrapped class model. """ def __init__(self, n_clusters=5): """ Attributes ---------- n_clusters : int Number of clusters for KMeans """ self.n_clusters = n_clusters self.model = KMeans(n_clusters=self.n_clusters)
[docs] def fit(self): """ Wrapper method for fit() method of kmeans model. """ self.model.fit(self.observations)
[docs] def get_clusters(self, train_X): """ Generates the raw samples associated with each cluster. Parameters ---------- train_X : {array-like, sparse matrix}, shape = [n_samples, n_features] Returns ------- cluster_data : dict Dictionary of clusters. """ cluster_data = {} labels = self.model.labels_ for i in range(0, self.n_clusters): cluster_data[i+1] = [train_X[np.where(labels == i)]] return cluster_data
[docs] def score_samples(self, X): """ Predicts which cluster each of the samples in X belongs. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] """ return self.model.predict(X)
[docs]class GaussianMixtureModel(object): """ Wrapper class for Scikit Learn's Gaussian Mixture Model. Attributes ---------- n_components : int Number of clusters for KMeans model : GaussianMixture Wrapped class model. """ def __init__(self, n_components=5, means_init=None): """ Parameters ---------- n_components : int Number of GMM components. means_init : list List of length n_components of numerical values for initial means of GMM. """ self.n_components = n_components self.model = GaussianMixture(n_components=self.n_components, covariance_type='full', means_init=means_init)
[docs] def fit(self, train_X): """ Wrapper method for fit() method of GMM model. Parameters ---------- train_X : {array-like, sparse matrix}, shape = [n_samples, n_features] """ self.model.fit(train_X)
[docs] def generate_samples(self, n_samples): """ Generates the random samples according to the fitted distrubution. Returns ------- list List of numpy arrays of randomly generated observations. """ points, labels = self.model.sample(n_samples) return points
[docs] def score_samples(self, X): """ Predicts the log liklihood score of the samples in X. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] """ return self.model.predict_proba(X)
[docs]class KDEModel(object): """ Wrapper class for Scikit Learn's Kernel Density Estsimation model. Attributes ---------- model : KernelDensity Wrapped class model. """ def __init__(self, kernel='gaussian', bandwidth=.001): self.model = KernelDensity(kernel='gaussian', bandwidth=bandwidth)
[docs] def fit(self, train_X): """ Wrapper method for fit() method of Kernel Density model. Parameters ---------- train_X : {array-like, sparse matrix}, shape = [n_samples, n_features] """ self.model.fit(train_X)
[docs] def generate_samples(self, n_samples): """ Generates the random samples according to the fitted distrubution. Returns ------- list List of numpy arrays of randomly generated observations. """ points = self.model.sample(n_samples) return points
[docs] def score_samples(self, X): """ Predicts the log liklihood score of the samples in X. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] """ return self.model.score_samples(X)