-
-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Closes issue #65 * added describe feature (pandas.describe) * Update README.md --------- Co-authored-by: Montana Flynn <montana949@gmail.com>
- Loading branch information
1 parent
a145605
commit b9dad85
Showing
5 changed files
with
184 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package stats | ||
|
||
import "fmt" | ||
|
||
// Holds information about the dataset provided to Describe | ||
type Description struct { | ||
Count int | ||
Mean float64 | ||
Std float64 | ||
Max float64 | ||
Min float64 | ||
DescriptionPercentiles []descriptionPercentile | ||
AllowedNaN bool | ||
} | ||
|
||
// Specifies percentiles to be computed | ||
type descriptionPercentile struct { | ||
Percentile float64 | ||
Value float64 | ||
} | ||
|
||
// Describe generates descriptive statistics about a provided dataset, similar to python's pandas.describe() | ||
func Describe(input Float64Data, allowNaN bool, percentiles *[]float64) (*Description, error) { | ||
return DescribePercentileFunc(input, allowNaN, percentiles, Percentile) | ||
} | ||
|
||
// Describe generates descriptive statistics about a provided dataset, similar to python's pandas.describe() | ||
// Takes in a function to use for percentile calculation | ||
func DescribePercentileFunc(input Float64Data, allowNaN bool, percentiles *[]float64, percentileFunc func(Float64Data, float64) (float64, error)) (*Description, error) { | ||
var description Description | ||
description.AllowedNaN = allowNaN | ||
description.Count = input.Len() | ||
|
||
if description.Count == 0 && !allowNaN { | ||
return &description, ErrEmptyInput | ||
} | ||
|
||
// Disregard error, since it cannot be thrown if Count is > 0 and allowNaN is false, else NaN is accepted | ||
description.Std, _ = StandardDeviation(input) | ||
description.Max, _ = Max(input) | ||
description.Min, _ = Min(input) | ||
description.Mean, _ = Mean(input) | ||
|
||
if percentiles != nil { | ||
for _, percentile := range *percentiles { | ||
if value, err := percentileFunc(input, percentile); err == nil || allowNaN { | ||
description.DescriptionPercentiles = append(description.DescriptionPercentiles, descriptionPercentile{Percentile: percentile, Value: value}) | ||
} | ||
} | ||
} | ||
|
||
return &description, nil | ||
} | ||
|
||
/* | ||
Represents the Description instance in a string format with specified number of decimals | ||
count 3 | ||
mean 2.00 | ||
std 0.82 | ||
max 3.00 | ||
min 1.00 | ||
25.00% NaN | ||
50.00% 1.50 | ||
75.00% 2.50 | ||
NaN OK true | ||
*/ | ||
func (d *Description) String(decimals int) string { | ||
var str string | ||
|
||
str += fmt.Sprintf("count\t%d\n", d.Count) | ||
str += fmt.Sprintf("mean\t%.*f\n", decimals, d.Mean) | ||
str += fmt.Sprintf("std\t%.*f\n", decimals, d.Std) | ||
str += fmt.Sprintf("max\t%.*f\n", decimals, d.Max) | ||
str += fmt.Sprintf("min\t%.*f\n", decimals, d.Min) | ||
for _, percentile := range d.DescriptionPercentiles { | ||
str += fmt.Sprintf("%.2f%%\t%.*f\n", percentile.Percentile, decimals, percentile.Value) | ||
} | ||
str += fmt.Sprintf("NaN OK\t%t", d.AllowedNaN) | ||
return str | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
package stats_test | ||
|
||
import ( | ||
"math" | ||
"testing" | ||
|
||
"github.com/montanaflynn/stats" | ||
) | ||
|
||
func TestDescribeValidDataset(t *testing.T) { | ||
_, err := stats.Describe([]float64{1.0, 2.0, 3.0}, false, &[]float64{25.0, 50.0, 75.0}) | ||
if err != nil { | ||
t.Errorf("Returned an error") | ||
} | ||
} | ||
|
||
func TestDescribeEmptyDataset(t *testing.T) { | ||
_, err := stats.Describe([]float64{}, false, nil) | ||
if err != stats.ErrEmptyInput { | ||
t.Errorf("Did not return empty input error") | ||
} | ||
} | ||
|
||
func TestDescribeEmptyDatasetNaN(t *testing.T) { | ||
describe, err := stats.Describe([]float64{}, true, nil) | ||
if err != nil { | ||
t.Errorf("Returned an error") | ||
} | ||
|
||
if !math.IsNaN(describe.Max) || !math.IsNaN(describe.Mean) || !math.IsNaN(describe.Min) || !math.IsNaN(describe.Std) { | ||
t.Errorf("Was not NaN") | ||
} | ||
} | ||
|
||
func TestDescribeValidDatasetNaN(t *testing.T) { | ||
describe, err := stats.Describe([]float64{1.0, 2.0, 3.0}, true, &[]float64{25.0, 50.0, 75.0}) | ||
if err != nil { | ||
t.Errorf("Returned an error") | ||
} | ||
|
||
if math.IsNaN(describe.Max) { | ||
t.Errorf("Was NaN") | ||
} | ||
} | ||
|
||
func TestDescribeValues(t *testing.T) { | ||
dataSet := []float64{1.0, 2.0, 3.0} | ||
percentiles := []float64{25.0, 50.0, 75.0} | ||
describe, _ := stats.Describe(dataSet, true, &percentiles) | ||
if describe.Count != len(dataSet) { | ||
t.Errorf("Count was not == length of dataset") | ||
} | ||
if len(describe.DescriptionPercentiles) != len(percentiles) { | ||
t.Errorf("Percentiles length was not == length of input percentiles") | ||
} | ||
|
||
max, _ := stats.Max(dataSet) | ||
if max != describe.Max { | ||
t.Errorf("Max was not equal to Max(dataset)") | ||
} | ||
|
||
min, _ := stats.Min(dataSet) | ||
if min != describe.Min { | ||
t.Errorf("Min was not equal to Min(dataset)") | ||
} | ||
|
||
mean, _ := stats.Mean(dataSet) | ||
if mean != describe.Mean { | ||
t.Errorf("Mean was not equal to Mean(dataset)") | ||
} | ||
|
||
std, _ := stats.StandardDeviation(dataSet) | ||
if std != describe.Std { | ||
t.Errorf("Std was not equal to StandardDeviation(dataset)") | ||
} | ||
} | ||
|
||
func TestDescribeString(t *testing.T) { | ||
describe, _ := stats.Describe([]float64{1.0, 2.0, 3.0}, true, &[]float64{25.0, 50.0, 75.0}) | ||
if describe.String(2) != "count\t3\nmean\t2.00\nstd\t0.82\nmax\t3.00\nmin\t1.00\n25.00%\tNaN\n50.00%\t1.50\n75.00%\t2.50\nNaN OK\ttrue" { | ||
t.Errorf("String output is not correct") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters