Compare commits
10 Commits
3d421a8534
...
a39e7c73a5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a39e7c73a5 | ||
|
|
3a134d8f1e | ||
|
|
c37dfd4918 | ||
|
|
7293844a1f | ||
|
|
20d698d435 | ||
|
|
607561b9fe | ||
|
|
f837a1d217 | ||
|
|
9f35c4eeeb | ||
|
|
209c1b93c6 | ||
|
|
3bd76e879b |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
||||
benchmark
|
||||
.vscode
|
||||
12
README.md
12
README.md
@@ -13,5 +13,17 @@ The above command will generate two files that will be used for profiling the pr
|
||||
To use the generated files for profiling you have to run:
|
||||
`go tool pprof cpu00.pprof`
|
||||
|
||||
For memory
|
||||
`go test -bench . -benchtime=10x -run ^$ -memprofile mem00.pprof`
|
||||
|
||||
`go tool pprof -alloc_space mem00.pprof`
|
||||
|
||||
This will start an interactive cli session where you can view more details about the functions running in the program
|
||||
Inside that interactive cli session you can use the `top` and `list` commands to see details of the functions that are running
|
||||
|
||||
|
||||
For tracing:
|
||||
|
||||
`go test -bench . -benchtime=10x -run ^$ -trace trace01.out`
|
||||
|
||||
`go tool trace trace01.out`
|
||||
6
benchmem02.txt
Normal file
6
benchmem02.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/Serares/coolStats
|
||||
BenchmarkRun-16 10 266165288 ns/op 214887704 B/op 2527035 allocs/op
|
||||
PASS
|
||||
ok github.com/Serares/coolStats 2.935s
|
||||
6
benchmem03.txt
Normal file
6
benchmem03.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/Serares/coolStats
|
||||
BenchmarkRun-16 10 92313063 ns/op 215030855 B/op 2529386 allocs/op
|
||||
PASS
|
||||
ok github.com/Serares/coolStats 1.187s
|
||||
7
benchresultsMinMax_Improvement.txt
Normal file
7
benchresultsMinMax_Improvement.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/Serares/coolStats
|
||||
cpu: 11th Gen Intel(R) Core(TM) i7-11850H @ 2.50GHz
|
||||
BenchmarkRun-16 10 134744628 ns/op
|
||||
PASS
|
||||
ok github.com/Serares/coolStats 1.487s
|
||||
6
benchresultsMinMax_add_min_max.txt
Normal file
6
benchresultsMinMax_add_min_max.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/Serares/coolStats
|
||||
BenchmarkRun-16 10 374300073 ns/op
|
||||
PASS
|
||||
ok github.com/Serares/coolStats 4.114s
|
||||
6
benchresultsMinMax_using_func_on_partial_dataset.txt
Normal file
6
benchresultsMinMax_using_func_on_partial_dataset.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/Serares/coolStats
|
||||
BenchmarkRun-16 10 226407658 ns/op
|
||||
PASS
|
||||
ok github.com/Serares/coolStats 2.491s
|
||||
BIN
coolStats.test
Executable file
BIN
coolStats.test
Executable file
Binary file not shown.
BIN
cpu00.pprof
Normal file
BIN
cpu00.pprof
Normal file
Binary file not shown.
BIN
cpu00max.pprof
Normal file
BIN
cpu00max.pprof
Normal file
Binary file not shown.
BIN
cpu01.pprof
Normal file
BIN
cpu01.pprof
Normal file
Binary file not shown.
BIN
cpu01max.pprof
Normal file
BIN
cpu01max.pprof
Normal file
Binary file not shown.
BIN
cpu02max.pprof
Normal file
BIN
cpu02max.pprof
Normal file
Binary file not shown.
27
csv.go
27
csv.go
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
@@ -21,18 +22,34 @@ func avg(data []float64) float64 {
|
||||
return sum(data) / float64(len(data))
|
||||
}
|
||||
|
||||
func min(data []float64) float64 {
|
||||
sort.Float64s(data)
|
||||
return data[0]
|
||||
}
|
||||
|
||||
func max(data []float64) float64 {
|
||||
sort.Float64s(data)
|
||||
return data[len(data)-1]
|
||||
}
|
||||
|
||||
func csv2float(r io.Reader, column int) ([]float64, error) {
|
||||
cr := csv.NewReader(r)
|
||||
// set this to reuse the same slice for each read
|
||||
// operation to reduce the memory allocation
|
||||
cr.ReuseRecord = true
|
||||
if column < 1 {
|
||||
return nil, fmt.Errorf("%w: please provide a valid column number", ErrInvalidColumn)
|
||||
}
|
||||
column--
|
||||
allData, err := cr.ReadAll()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read data from file: %w", err)
|
||||
}
|
||||
var data []float64
|
||||
for i, row := range allData {
|
||||
for i := 0; ; i++ {
|
||||
row, err := cr.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read data from file: %w", err)
|
||||
}
|
||||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -23,11 +23,13 @@ func TestOperations(t *testing.T) {
|
||||
}{
|
||||
{"Sum", sum, []float64{300, 85.927, -30, 436}},
|
||||
{"Avg", avg, []float64{37.5, 6.609769230769231, -15, 72.666666666666666}},
|
||||
{"Min", min, []float64{10, 2.2, -20, 37}},
|
||||
{"Max", max, []float64{100, 12.287, -10, 129}},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
for k, exp := range tc.exp {
|
||||
name := fmt.Sprintf("%sData%d", tc.name, k)
|
||||
name := fmt.Sprintf("%s Data %d", tc.name, k)
|
||||
t.Run(name, func(t *testing.T) {
|
||||
res := tc.op(data[k])
|
||||
// comparing floats might not be the best solution
|
||||
|
||||
100
main.go
100
main.go
@@ -5,6 +5,8 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -32,30 +34,100 @@ func run(filenames []string, op string, column int, out io.Writer) error {
|
||||
opFunc = sum
|
||||
case "avg":
|
||||
opFunc = avg
|
||||
case "min":
|
||||
opFunc = min
|
||||
case "max":
|
||||
opFunc = max
|
||||
default:
|
||||
return fmt.Errorf("%w: %s", ErrInvalidOperation, op)
|
||||
}
|
||||
|
||||
consolidate := make([]float64, 0)
|
||||
resCh := make(chan []float64)
|
||||
errCh := make(chan error)
|
||||
doneCh := make(chan struct{})
|
||||
filesCh := make(chan string)
|
||||
wg := sync.WaitGroup{}
|
||||
|
||||
for _, fname := range filenames {
|
||||
f, err := os.Open(fname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot open file: %w", err)
|
||||
go func() {
|
||||
defer close(filesCh)
|
||||
for _, fname := range filenames {
|
||||
filesCh <- fname
|
||||
}
|
||||
}()
|
||||
|
||||
data, err := csv2float(f, column)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for i := 0; i < runtime.NumCPU(); i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for fname := range filesCh {
|
||||
f, err := os.Open(fname)
|
||||
if err != nil {
|
||||
errCh <- fmt.Errorf("cannot open file: %w", err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := f.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
data, err := csv2float(f, column)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
}
|
||||
|
||||
if err := f.Close(); err != nil {
|
||||
errCh <- err
|
||||
}
|
||||
|
||||
resCh <- data
|
||||
}
|
||||
}()
|
||||
|
||||
consolidate = append(consolidate, data...)
|
||||
}
|
||||
|
||||
_, err := fmt.Fprintln(out, opFunc(consolidate))
|
||||
return err
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(doneCh)
|
||||
}()
|
||||
// TODO try to improve the performance of min and max
|
||||
// by running the functions in multiple gorutines
|
||||
// or by trying to run the functions for each file that is read
|
||||
for {
|
||||
select {
|
||||
case err := <-errCh:
|
||||
return err
|
||||
case data := <-resCh:
|
||||
if op == "min" || op == "max" {
|
||||
// todo see if using goroutines here can improve the performance
|
||||
// spawn like 4 goroutines and divide the date between them to be processed
|
||||
minWg := sync.WaitGroup{}
|
||||
theData := make(chan float64)
|
||||
quarterLength := len(data) / 4
|
||||
start := 0
|
||||
endQuarter := quarterLength
|
||||
incrementQuarter := 2
|
||||
for i := 0; i < 4; i++ {
|
||||
minWg.Add(1)
|
||||
go func(start, end int) {
|
||||
defer minWg.Done()
|
||||
theData <- opFunc(data[start:end])
|
||||
}(start, endQuarter)
|
||||
start = endQuarter
|
||||
endQuarter = quarterLength * incrementQuarter
|
||||
incrementQuarter++
|
||||
}
|
||||
go func() {
|
||||
minWg.Wait()
|
||||
close(theData)
|
||||
}()
|
||||
|
||||
for quarterData := range theData {
|
||||
consolidate = append(consolidate, quarterData)
|
||||
}
|
||||
|
||||
} else {
|
||||
consolidate = append(consolidate, data...)
|
||||
}
|
||||
case <-doneCh:
|
||||
_, err := fmt.Fprintln(out, opFunc(consolidate))
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ func BenchmarkRun(b *testing.B) {
|
||||
// reset the time before running the benchmark loop
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if err := run(filenames, "avg", 2, io.Discard); err != nil {
|
||||
if err := run(filenames, "max", 2, io.Discard); err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
BIN
mem00.pprof
Normal file
BIN
mem00.pprof
Normal file
Binary file not shown.
BIN
mem00max.pprof
Normal file
BIN
mem00max.pprof
Normal file
Binary file not shown.
BIN
trace01.out
Normal file
BIN
trace01.out
Normal file
Binary file not shown.
BIN
trace01max.out
Normal file
BIN
trace01max.out
Normal file
Binary file not shown.
BIN
trace02.out
Normal file
BIN
trace02.out
Normal file
Binary file not shown.
BIN
trace03.out
Normal file
BIN
trace03.out
Normal file
Binary file not shown.
Reference in New Issue
Block a user