Compare commits

...

10 Commits

Author SHA1 Message Date
rjianu
a39e7c73a5 chore: Add a tracer and cpu profiler files 2023-07-18 14:58:38 +03:00
rjianu
3a134d8f1e chore: Add bench logs for first implementation of min max 2023-07-18 14:47:23 +03:00
rjianu
c37dfd4918 chore: Add bench for min max without the goroutines 2023-07-18 14:45:16 +03:00
rjianu
7293844a1f feat: Improve performance for min max using goroutines 2023-07-18 14:43:10 +03:00
rjianu
20d698d435 feat: Update the min max performance 2023-07-17 20:39:29 +03:00
rjianu
607561b9fe feat: Add min and max and try to improve the performance 2023-07-17 20:18:16 +03:00
rjianu
f837a1d217 feat: Improve the concurrency 2023-07-12 17:30:18 +03:00
rjianu
9f35c4eeeb feat: Improve the program execution time by using gorutines 2023-07-12 16:56:36 +03:00
rjianu
209c1b93c6 chore: All the benchmark stats 2023-07-12 16:29:25 +03:00
rjianu
3bd76e879b feat: Improve memory allocation 2023-07-12 16:26:54 +03:00
23 changed files with 158 additions and 23 deletions

3
.gitignore vendored
View File

@@ -1 +1,2 @@
benchmark benchmark
.vscode

View File

@@ -13,5 +13,17 @@ The above command will generate two files that will be used for profiling the pr
To use the generated files for profiling you have to run: To use the generated files for profiling you have to run:
`go tool pprof cpu00.pprof` `go tool pprof cpu00.pprof`
For memory
`go test -bench . -benchtime=10x -run ^$ -memprofile mem00.pprof`
`go tool pprof -alloc_space mem00.pprof`
This will start an interactive cli session where you can view more details about the functions running in the program This will start an interactive cli session where you can view more details about the functions running in the program
Inside that interactive cli session you can use the `top` and `list` commands to see details of the functions that are running Inside that interactive cli session you can use the `top` and `list` commands to see details of the functions that are running
For tracing:
`go test -bench . -benchtime=10x -run ^$ -trace trace01.out`
`go tool trace trace01.out`

6
benchmem02.txt Normal file
View File

@@ -0,0 +1,6 @@
goos: linux
goarch: amd64
pkg: github.com/Serares/coolStats
BenchmarkRun-16 10 266165288 ns/op 214887704 B/op 2527035 allocs/op
PASS
ok github.com/Serares/coolStats 2.935s

6
benchmem03.txt Normal file
View File

@@ -0,0 +1,6 @@
goos: linux
goarch: amd64
pkg: github.com/Serares/coolStats
BenchmarkRun-16 10 92313063 ns/op 215030855 B/op 2529386 allocs/op
PASS
ok github.com/Serares/coolStats 1.187s

View File

@@ -0,0 +1,7 @@
goos: linux
goarch: amd64
pkg: github.com/Serares/coolStats
cpu: 11th Gen Intel(R) Core(TM) i7-11850H @ 2.50GHz
BenchmarkRun-16 10 134744628 ns/op
PASS
ok github.com/Serares/coolStats 1.487s

View File

@@ -0,0 +1,6 @@
goos: linux
goarch: amd64
pkg: github.com/Serares/coolStats
BenchmarkRun-16 10 374300073 ns/op
PASS
ok github.com/Serares/coolStats 4.114s

View File

@@ -0,0 +1,6 @@
goos: linux
goarch: amd64
pkg: github.com/Serares/coolStats
BenchmarkRun-16 10 226407658 ns/op
PASS
ok github.com/Serares/coolStats 2.491s

BIN
coolStats.test Executable file

Binary file not shown.

BIN
cpu00.pprof Normal file

Binary file not shown.

BIN
cpu00max.pprof Normal file

Binary file not shown.

BIN
cpu01.pprof Normal file

Binary file not shown.

BIN
cpu01max.pprof Normal file

Binary file not shown.

BIN
cpu02max.pprof Normal file

Binary file not shown.

27
csv.go
View File

@@ -4,6 +4,7 @@ import (
"encoding/csv" "encoding/csv"
"fmt" "fmt"
"io" "io"
"sort"
"strconv" "strconv"
) )
@@ -21,18 +22,34 @@ func avg(data []float64) float64 {
return sum(data) / float64(len(data)) return sum(data) / float64(len(data))
} }
func min(data []float64) float64 {
sort.Float64s(data)
return data[0]
}
func max(data []float64) float64 {
sort.Float64s(data)
return data[len(data)-1]
}
func csv2float(r io.Reader, column int) ([]float64, error) { func csv2float(r io.Reader, column int) ([]float64, error) {
cr := csv.NewReader(r) cr := csv.NewReader(r)
// set this to reuse the same slice for each read
// operation to reduce the memory allocation
cr.ReuseRecord = true
if column < 1 { if column < 1 {
return nil, fmt.Errorf("%w: please provide a valid column number", ErrInvalidColumn) return nil, fmt.Errorf("%w: please provide a valid column number", ErrInvalidColumn)
} }
column-- column--
allData, err := cr.ReadAll()
if err != nil {
return nil, fmt.Errorf("cannot read data from file: %w", err)
}
var data []float64 var data []float64
for i, row := range allData { for i := 0; ; i++ {
row, err := cr.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("cannot read data from file: %w", err)
}
if i == 0 { if i == 0 {
continue continue
} }

View File

@@ -23,11 +23,13 @@ func TestOperations(t *testing.T) {
}{ }{
{"Sum", sum, []float64{300, 85.927, -30, 436}}, {"Sum", sum, []float64{300, 85.927, -30, 436}},
{"Avg", avg, []float64{37.5, 6.609769230769231, -15, 72.666666666666666}}, {"Avg", avg, []float64{37.5, 6.609769230769231, -15, 72.666666666666666}},
{"Min", min, []float64{10, 2.2, -20, 37}},
{"Max", max, []float64{100, 12.287, -10, 129}},
} }
for _, tc := range testCases { for _, tc := range testCases {
for k, exp := range tc.exp { for k, exp := range tc.exp {
name := fmt.Sprintf("%sData%d", tc.name, k) name := fmt.Sprintf("%s Data %d", tc.name, k)
t.Run(name, func(t *testing.T) { t.Run(name, func(t *testing.T) {
res := tc.op(data[k]) res := tc.op(data[k])
// comparing floats might not be the best solution // comparing floats might not be the best solution

100
main.go
View File

@@ -5,6 +5,8 @@ import (
"fmt" "fmt"
"io" "io"
"os" "os"
"runtime"
"sync"
) )
func main() { func main() {
@@ -32,30 +34,100 @@ func run(filenames []string, op string, column int, out io.Writer) error {
opFunc = sum opFunc = sum
case "avg": case "avg":
opFunc = avg opFunc = avg
case "min":
opFunc = min
case "max":
opFunc = max
default: default:
return fmt.Errorf("%w: %s", ErrInvalidOperation, op) return fmt.Errorf("%w: %s", ErrInvalidOperation, op)
} }
consolidate := make([]float64, 0) consolidate := make([]float64, 0)
resCh := make(chan []float64)
errCh := make(chan error)
doneCh := make(chan struct{})
filesCh := make(chan string)
wg := sync.WaitGroup{}
for _, fname := range filenames { go func() {
f, err := os.Open(fname) defer close(filesCh)
if err != nil { for _, fname := range filenames {
return fmt.Errorf("cannot open file: %w", err) filesCh <- fname
} }
}()
data, err := csv2float(f, column) for i := 0; i < runtime.NumCPU(); i++ {
if err != nil { wg.Add(1)
return err go func() {
} defer wg.Done()
for fname := range filesCh {
f, err := os.Open(fname)
if err != nil {
errCh <- fmt.Errorf("cannot open file: %w", err)
return
}
if err := f.Close(); err != nil { data, err := csv2float(f, column)
return err if err != nil {
} errCh <- err
}
if err := f.Close(); err != nil {
errCh <- err
}
resCh <- data
}
}()
consolidate = append(consolidate, data...)
} }
_, err := fmt.Fprintln(out, opFunc(consolidate)) go func() {
return err wg.Wait()
close(doneCh)
}()
// TODO try to improve the performance of min and max
// by running the functions in multiple gorutines
// or by trying to run the functions for each file that is read
for {
select {
case err := <-errCh:
return err
case data := <-resCh:
if op == "min" || op == "max" {
// todo see if using goroutines here can improve the performance
// spawn like 4 goroutines and divide the date between them to be processed
minWg := sync.WaitGroup{}
theData := make(chan float64)
quarterLength := len(data) / 4
start := 0
endQuarter := quarterLength
incrementQuarter := 2
for i := 0; i < 4; i++ {
minWg.Add(1)
go func(start, end int) {
defer minWg.Done()
theData <- opFunc(data[start:end])
}(start, endQuarter)
start = endQuarter
endQuarter = quarterLength * incrementQuarter
incrementQuarter++
}
go func() {
minWg.Wait()
close(theData)
}()
for quarterData := range theData {
consolidate = append(consolidate, quarterData)
}
} else {
consolidate = append(consolidate, data...)
}
case <-doneCh:
_, err := fmt.Fprintln(out, opFunc(consolidate))
return err
}
}
} }

View File

@@ -76,7 +76,7 @@ func BenchmarkRun(b *testing.B) {
// reset the time before running the benchmark loop // reset the time before running the benchmark loop
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
if err := run(filenames, "avg", 2, io.Discard); err != nil { if err := run(filenames, "max", 2, io.Discard); err != nil {
b.Error(err) b.Error(err)
} }
} }

BIN
mem00.pprof Normal file

Binary file not shown.

BIN
mem00max.pprof Normal file

Binary file not shown.

BIN
trace01.out Normal file

Binary file not shown.

BIN
trace01max.out Normal file

Binary file not shown.

BIN
trace02.out Normal file

Binary file not shown.

BIN
trace03.out Normal file

Binary file not shown.