Skip to content

Commit

Permalink
make release-tag: Merge branch 'master' into stable
Browse files Browse the repository at this point in the history
  • Loading branch information
pvk-developer committed Apr 26, 2023
2 parents fc2adeb + c8ca384 commit 55660fb
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 137 deletions.
13 changes: 12 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# History

## v0.9.0 - 2023-04-26

This release adds support for pandas 2.0 and above. Additionally adds a functionality to find
version add-ons and renames ``covariance`` to ``correlation``.

### Maintenance

* Remove upper bound for pandas - Issue[#349](https://github.com/sdv-dev/Copulas/issues/349) by @pvk-developer
* Rename covariance to correlation - PR[#346](https://github.com/sdv-dev/Copulas/pull/346) by @frances-h
* Add functionality to find version add-on - Issue[#349](https://github.com/sdv-dev/Copulas/issues/349) by @frances-h

## v0.8.0 - 2023-01-06

This release adds support for python 3.10 and 3.11. Additionally, it drops support for python 3.6.
Expand All @@ -12,7 +23,7 @@ This release adds support for python 3.10 and 3.11. Additionally, it drops suppo

## v0.7.0 - 2022-05-10

This release adds `gaussian` as a fallback distribution in case the user specified one fails. It also improves the `fit` of the `beta` distribution by properly estimatig the `loc` and `scale` parameters.
This release adds `gaussian` as a fallback distribution in case the user specified one fails. It also improves the `fit` of the `beta` distribution by properly estimating the `loc` and `scale` parameters.

### General Improvements

Expand Down
6 changes: 5 additions & 1 deletion copulas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

__author__ = 'DataCebo, Inc.'
__email__ = '[email protected]'
__version__ = '0.8.0'
__version__ = '0.9.0.dev1'

import contextlib
import importlib
Expand All @@ -13,6 +13,10 @@
import numpy as np
import pandas as pd

from copulas._addons import _find_addons

_find_addons(group='copulas_modules', parent_globals=globals())

EPSILON = np.finfo(np.float32).eps


Expand Down
26 changes: 26 additions & 0 deletions copulas/_addons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Copulas add-ons functionality."""
import warnings

from pkg_resources import iter_entry_points


def _find_addons(group, parent_globals):
"""Find and load add-ons based on the given group.
Args:
group (str):
The name of the entry points group to load.
parent_globals (dict):
The caller's global scope. Modules will be added
to the parent's global scope through their name.
"""
for entry_point in iter_entry_points(group=group):
try:
module = entry_point.load()
except Exception:
msg = f'Failed to load "{entry_point.name}" from "{entry_point.module}".'
warnings.warn(msg)
continue

if entry_point.name not in parent_globals:
parent_globals[entry_point.name] = module
52 changes: 24 additions & 28 deletions copulas/multivariate/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class GaussianMultivariate(Multivariate):
distribution names.
"""

covariance = None
correlation = None
columns = None
univariates = None

Expand Down Expand Up @@ -65,29 +65,29 @@ def _transform_to_normal(self, X):

return stats.norm.ppf(np.column_stack(U))

def _get_covariance(self, X):
"""Compute covariance matrix with transformed data.
def _get_correlation(self, X):
"""Compute correlation matrix with transformed data.
Args:
X (numpy.ndarray):
Data for which the covariance needs to be computed.
Data for which the correlation needs to be computed.
Returns:
numpy.ndarray:
computed covariance matrix.
computed correlation matrix.
"""
result = self._transform_to_normal(X)
covariance = pd.DataFrame(data=result).corr().to_numpy()
covariance = np.nan_to_num(covariance, nan=0.0)
correlation = pd.DataFrame(data=result).corr().to_numpy()
correlation = np.nan_to_num(correlation, nan=0.0)
# If singular, add some noise to the diagonal
if np.linalg.cond(covariance) > 1.0 / sys.float_info.epsilon:
covariance = covariance + np.identity(covariance.shape[0]) * EPSILON
if np.linalg.cond(correlation) > 1.0 / sys.float_info.epsilon:
correlation = correlation + np.identity(correlation.shape[0]) * EPSILON

return pd.DataFrame(covariance, index=self.columns, columns=self.columns)
return pd.DataFrame(correlation, index=self.columns, columns=self.columns)

@check_valid_values
def fit(self, X):
"""Compute the distribution for each variable and then its covariance matrix.
"""Compute the distribution for each variable and then its correlation matrix.
Arguments:
X (pandas.DataFrame):
Expand Down Expand Up @@ -126,8 +126,8 @@ def fit(self, X):
self.columns = columns
self.univariates = univariates

LOGGER.debug('Computing covariance')
self.covariance = self._get_covariance(X)
LOGGER.debug('Computing correlation')
self.correlation = self._get_correlation(X)
self.fitted = True

LOGGER.debug('GaussianMultivariate fitted successfully')
Expand All @@ -149,7 +149,7 @@ def probability_density(self, X):
"""
self.check_fit()
transformed = self._transform_to_normal(X)
return stats.multivariate_normal.pdf(transformed, cov=self.covariance)
return stats.multivariate_normal.pdf(transformed, cov=self.correlation)

def cumulative_distribution(self, X):
"""Compute the cumulative distribution value for each point in X.
Expand All @@ -168,7 +168,7 @@ def cumulative_distribution(self, X):
"""
self.check_fit()
transformed = self._transform_to_normal(X)
return stats.multivariate_normal.cdf(transformed, cov=self.covariance)
return stats.multivariate_normal.cdf(transformed, cov=self.correlation)

def _get_conditional_distribution(self, conditions):
"""Compute the parameters of a conditional multivariate normal distribution.
Expand All @@ -192,12 +192,12 @@ def _get_conditional_distribution(self, conditions):
names of the columns that will be sampled conditionally.
"""
columns2 = conditions.index
columns1 = self.covariance.columns.difference(columns2)
columns1 = self.correlation.columns.difference(columns2)

sigma11 = self.covariance.loc[columns1, columns1].to_numpy()
sigma12 = self.covariance.loc[columns1, columns2].to_numpy()
sigma21 = self.covariance.loc[columns2, columns1].to_numpy()
sigma22 = self.covariance.loc[columns2, columns2].to_numpy()
sigma11 = self.correlation.loc[columns1, columns1].to_numpy()
sigma12 = self.correlation.loc[columns1, columns2].to_numpy()
sigma21 = self.correlation.loc[columns2, columns1].to_numpy()
sigma22 = self.correlation.loc[columns2, columns2].to_numpy()

mu1 = np.zeros(len(columns1))
mu2 = np.zeros(len(columns2))
Expand All @@ -220,7 +220,7 @@ def _get_normal_samples(self, num_rows, conditions):
a standard normal multivariate conditioned on the given condition values.
"""
if conditions is None:
covariance = self.covariance
covariance = self.correlation
columns = self.columns
means = np.zeros(len(columns))
else:
Expand Down Expand Up @@ -277,11 +277,9 @@ def to_dict(self):
"""
self.check_fit()
univariates = [univariate.to_dict() for univariate in self.univariates]
warnings.warn('`covariance` will be renamed to `correlation` in v0.4.0',
DeprecationWarning)

return {
'covariance': self.covariance.to_numpy().tolist(),
'correlation': self.correlation.to_numpy().tolist(),
'univariates': univariates,
'columns': self.columns,
'type': get_qualified_name(self),
Expand All @@ -308,10 +306,8 @@ def from_dict(cls, copula_dict):
for parameters in copula_dict['univariates']:
instance.univariates.append(Univariate.from_dict(parameters))

covariance = copula_dict['covariance']
instance.covariance = pd.DataFrame(covariance, index=columns, columns=columns)
correlation = copula_dict['correlation']
instance.correlation = pd.DataFrame(correlation, index=columns, columns=columns)
instance.fitted = True
warnings.warn('`covariance` will be renamed to `correlation` in v0.4.0',
DeprecationWarning)

return instance
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.8.0
current_version = 0.9.0.dev1
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
"matplotlib>=3.6.0,<4;python_version>='3.10'",
"numpy>=1.20.0,<2;python_version<'3.10'",
"numpy>=1.23.3,<2;python_version>='3.10'",
"pandas>=1.1.3,<2;python_version<'3.10'",
"pandas>=1.3.4,<2;python_version>='3.10' and python_version<'3.11'",
"pandas>=1.5.0,<2;python_version>='3.11'",
"pandas>=1.1.3;python_version<'3.10'",
"pandas>=1.3.4;python_version>='3.10' and python_version<'3.11'",
"pandas>=1.5.0;python_version>='3.11'",
"scipy>=1.5.4,<2;python_version<'3.10'",
"scipy>=1.9.2,<2;python_version>='3.10'",
]
Expand Down Expand Up @@ -137,6 +137,6 @@
test_suite='tests',
tests_require=tests_require,
url='https://github.com/sdv-dev/Copulas',
version='0.8.0',
version='0.9.0.dev1',
zip_safe=False,
)
42 changes: 21 additions & 21 deletions tests/unit/multivariate/test_gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,26 +185,26 @@ def test__transform_to_normal_dataframe(self):
passed = dist_b.cdf.call_args[0][0]
np.testing.assert_allclose(expected, passed)

def test__get_covariance(self):
"""_get_covariance computes the covariance matrix of normalized values."""
def test__get_correlation(self):
"""_get_correlation computes the correlation matrix of normalized values."""
# Setup
copula = GaussianMultivariate(GaussianUnivariate)
copula.fit(self.data)

expected_covariance = np.array([
expected_correlation = np.array([
[1., -0.01261819, -0.19821644],
[-0.01261819, 1., -0.16896087],
[-0.19821644, -0.16896087, 1.]
])

# Run
covariance = copula._get_covariance(self.data)
correlation = copula._get_correlation(self.data)

# Check
assert np.isclose(covariance, expected_covariance).all().all()
assert np.isclose(correlation, expected_correlation).all().all()

def test_fit_default_distribution(self):
"""On fit, a distribution is created for each column along the covariance and means"""
"""On fit, a distribution is created for each column along the correlation and means"""

copula = GaussianMultivariate(GaussianUnivariate)
copula.fit(self.data)
Expand All @@ -215,8 +215,8 @@ def test_fit_default_distribution(self):
assert copula.univariates[i]._params['loc'] == self.data[key].mean()
assert copula.univariates[i]._params['scale'] == np.std(self.data[key])

expected_covariance = copula._get_covariance(self.data)
assert (copula.covariance == expected_covariance).all().all()
expected_correlation = copula._get_correlation(self.data)
assert (copula.correlation == expected_correlation).all().all()

def test_fit_distribution_arg(self):
"""On fit, the distributions for each column use instances of copula.distribution."""
Expand All @@ -234,8 +234,8 @@ def test_fit_distribution_arg(self):
assert copula.columns[i] == key
assert get_qualified_name(copula.univariates[i].__class__) == copula.distribution

expected_covariance = copula._get_covariance(self.data)
assert (copula.covariance == expected_covariance).all().all()
expected_correlation = copula._get_correlation(self.data)
assert (copula.correlation == expected_correlation).all().all()

def test_fit_distribution_selector(self):
"""
Expand Down Expand Up @@ -269,8 +269,8 @@ def test_fit_numpy_array(self):
assert univariate._params['loc'] == np.mean(self.data[column])
assert univariate._params['scale'] == np.std(self.data[column])

expected_covariance = copula._get_covariance(pd.DataFrame(self.data.to_numpy()))
assert (copula.covariance == expected_covariance).all().all()
expected_correlation = copula._get_correlation(pd.DataFrame(self.data.to_numpy()))
assert (copula.correlation == expected_correlation).all().all()

@patch('copulas.univariate.truncated_gaussian.TruncatedGaussian._fit')
@patch('copulas.multivariate.gaussian.warnings')
Expand Down Expand Up @@ -390,8 +390,8 @@ def test_sample(self, normal_mock):
assert result.equals(expected_result)

assert normal_mock.called_once_with(
np.zeros(instance.covariance.shape[0]),
instance.covariance,
np.zeros(instance.correlation.shape[0]),
instance.correlation,
5
)

Expand Down Expand Up @@ -423,7 +423,7 @@ def test_sample_random_state(self):
result = instance.sample(5)

# Check
pd.testing.assert_frame_equal(result, expected_result, check_less_precise=True)
pd.testing.assert_frame_equal(result, expected_result)

def test_to_dict(self):
"""To_dict returns the parameters to replicate the copula."""
Expand All @@ -439,8 +439,8 @@ def test_to_dict(self):
assert result['columns'] == ['column1', 'column2', 'column3']
assert len(result['univariates']) == 3

expected_cov = copula._get_covariance(self.data).to_numpy().tolist()
np.testing.assert_equal(result['covariance'], expected_cov)
expected_cov = copula._get_correlation(self.data).to_numpy().tolist()
np.testing.assert_equal(result['correlation'], expected_cov)

for univariate, result_univariate in zip(copula.univariates, result['univariates']):
assert univariate.to_dict() == result_univariate
Expand All @@ -466,7 +466,7 @@ def test_from_dict(self):
def test_sample_constant_column(self):
"""Gaussian copula can sample after being fit with a constant column.
This process will raise warnings when computing the covariance matrix
This process will raise warnings when computing the correlation matrix
"""
# Setup
instance = GaussianMultivariate()
Expand All @@ -490,12 +490,12 @@ def test_sample_constant_column(self):
# This is to check that the samples on the non constant column are not constant too.
assert len(result.loc[:, 1].unique()) > 1

covariance = instance.covariance
assert (~pd.isna(covariance)).all().all()
correlation = instance.correlation
assert (~pd.isna(correlation)).all().all()

def test__get_conditional_distribution(self):
gm = GaussianMultivariate()
gm.covariance = pd.DataFrame({
gm.correlation = pd.DataFrame({
'a': [1, 0.2, 0.3],
'b': [0.2, 1, 0.4],
'c': [0.3, 0.4, 1],
Expand Down
45 changes: 45 additions & 0 deletions tests/unit/test__addons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from unittest.mock import Mock, patch

from copulas._addons import _find_addons


@patch('copulas._addons.iter_entry_points')
def test__find_versions(entry_points_mock):
"""Test loading an add-on."""
# Setup
entry_point = Mock()
entry_point.name = 'entry_name'
entry_point.load.return_value = 'entry_point'
entry_points_mock.return_value = [entry_point]
test_dict = {}

# Run
_find_addons(group='group', parent_globals=test_dict)

# Assert
entry_points_mock.assert_called_once_with(group='group')
assert test_dict['entry_name'] == 'entry_point'


@patch('copulas._addons.warnings.warn')
@patch('copulas._addons.iter_entry_points')
def test__find_versions_bad_addon(entry_points_mock, warning_mock):
"""Test failing to load an add-on generates a warning."""
# Setup
def entry_point_error():
raise ValueError()

bad_entry_point = Mock()
bad_entry_point.name = 'bad_entry_point'
bad_entry_point.module = 'bad_module'
bad_entry_point.load.side_effect = entry_point_error
entry_points_mock.return_value = [bad_entry_point]
test_dict = {}
msg = 'Failed to load "bad_entry_point" from "bad_module".'

# Run
_find_addons(group='group', parent_globals=test_dict)

# Assert
entry_points_mock.assert_called_once_with(group='group')
warning_mock.assert_called_once_with(msg)
Loading

0 comments on commit 55660fb

Please sign in to comment.