chore: Add a tracer and cpu profiler files

chore: Add bench logs for first implementation of min max
chore: Add bench for min max without the goroutines
2023-07-18 14:58:38 +03:00 · 2023-07-18 14:47:23 +03:00 · 2023-07-18 14:45:16 +03:00 · 2023-07-18 14:43:10 +03:00 · 2023-07-17 20:39:29 +03:00 · 2023-07-17 20:18:16 +03:00
23 changed files with 158 additions and 23 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-benchmark
+benchmark
 .vscode
--- a/README.md
+++ b/README.md
@@ -13,5 +13,17 @@ The above command will generate two files that will be used for profiling the pr
 To use the generated files for profiling you have to run:
 `go tool pprof cpu00.pprof`
 For memory 
 `go test -bench . -benchtime=10x -run ^$ -memprofile mem00.pprof`
 `go tool pprof -alloc_space mem00.pprof`
 This will start an interactive cli session where you can view more details about the functions running in the program
-Inside that interactive cli session you can use the `top` and `list` commands to see details of the functions that are running
+Inside that interactive cli session you can use the `top` and `list` commands to see details of the functions that are running
 For tracing:
 `go test -bench . -benchtime=10x -run ^$ -trace trace01.out`
 `go tool trace trace01.out`
--- a/benchmem02.txt
+++ b/benchmem02.txt
@@ -0,0 +1,6 @@
 goos: linux
 goarch: amd64
 pkg: github.com/Serares/coolStats
 BenchmarkRun-16    	      10	 266165288 ns/op	214887704 B/op	 2527035 allocs/op
 PASS
 ok  	github.com/Serares/coolStats	2.935s
--- a/benchmem03.txt
+++ b/benchmem03.txt
@@ -0,0 +1,6 @@
 goos: linux
 goarch: amd64
 pkg: github.com/Serares/coolStats
 BenchmarkRun-16    	      10	  92313063 ns/op	215030855 B/op	 2529386 allocs/op
 PASS
 ok  	github.com/Serares/coolStats	1.187s
--- a/benchresultsMinMax_Improvement.txt
+++ b/benchresultsMinMax_Improvement.txt
@@ -0,0 +1,7 @@
 goos: linux
 goarch: amd64
 pkg: github.com/Serares/coolStats
 cpu: 11th Gen Intel(R) Core(TM) i7-11850H @ 2.50GHz
 BenchmarkRun-16    	      10	 134744628 ns/op
 PASS
 ok  	github.com/Serares/coolStats	1.487s
--- a/benchresultsMinMax_add_min_max.txt
+++ b/benchresultsMinMax_add_min_max.txt
@@ -0,0 +1,6 @@
 goos: linux
 goarch: amd64
 pkg: github.com/Serares/coolStats
 BenchmarkRun-16    	      10	 374300073 ns/op
 PASS
 ok  	github.com/Serares/coolStats	4.114s
--- a/benchresultsMinMax_using_func_on_partial_dataset.txt
+++ b/benchresultsMinMax_using_func_on_partial_dataset.txt
@@ -0,0 +1,6 @@
 goos: linux
 goarch: amd64
 pkg: github.com/Serares/coolStats
 BenchmarkRun-16    	      10	 226407658 ns/op
 PASS
 ok  	github.com/Serares/coolStats	2.491s
--- a/coolStats.test
+++ b/coolStats.test
--- a/cpu00.pprof
+++ b/cpu00.pprof
--- a/cpu00max.pprof
+++ b/cpu00max.pprof
--- a/cpu01.pprof
+++ b/cpu01.pprof
--- a/cpu01max.pprof
+++ b/cpu01max.pprof
--- a/cpu02max.pprof
+++ b/cpu02max.pprof
--- a/csv.go
+++ b/csv.go
@@ -4,6 +4,7 @@ import (
 	"encoding/csv"
 	"fmt"
 	"io"
 	"sort"
 	"strconv"
 )
@@ -21,18 +22,34 @@ func avg(data []float64) float64 {
 	return sum(data) / float64(len(data))
 }
 func min(data []float64) float64 {
 	sort.Float64s(data)
 	return data[0]
 }
 func max(data []float64) float64 {
 	sort.Float64s(data)
 	return data[len(data)-1]
 }
 func csv2float(r io.Reader, column int) ([]float64, error) {
 	cr := csv.NewReader(r)
 	// set this to reuse the same slice for each read
 	// operation to reduce the memory allocation
 	cr.ReuseRecord = true
 	if column < 1 {
 		return nil, fmt.Errorf("%w: please provide a valid column number", ErrInvalidColumn)
 	}
 	column--
 	allData, err := cr.ReadAll()
 	if err != nil {
 		return nil, fmt.Errorf("cannot read data from file: %w", err)
 	}
 	var data []float64
-	for i, row := range allData {
+	for i := 0; ; i++ {
 		row, err := cr.Read()
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
 			return nil, fmt.Errorf("cannot read data from file: %w", err)
 		}
 		if i == 0 {
 			continue
 		}
--- a/csv_test.go
+++ b/csv_test.go
@@ -23,11 +23,13 @@ func TestOperations(t *testing.T) {
 	}{
 		{"Sum", sum, []float64{300, 85.927, -30, 436}},
 		{"Avg", avg, []float64{37.5, 6.609769230769231, -15, 72.666666666666666}},
 		{"Min", min, []float64{10, 2.2, -20, 37}},
 		{"Max", max, []float64{100, 12.287, -10, 129}},
 	}
 	for _, tc := range testCases {
 		for k, exp := range tc.exp {
-			name := fmt.Sprintf("%sData%d", tc.name, k)
+			name := fmt.Sprintf("%s Data %d", tc.name, k)
 			t.Run(name, func(t *testing.T) {
 				res := tc.op(data[k])
 				// comparing floats might not be the best solution
--- a/main.go
+++ b/main.go
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"io"
 	"os"
 	"runtime"
 	"sync"
 )
 func main() {
@@ -32,30 +34,100 @@ func run(filenames []string, op string, column int, out io.Writer) error {
 		opFunc = sum
 	case "avg":
 		opFunc = avg
 	case "min":
 		opFunc = min
 	case "max":
 		opFunc = max
 	default:
 		return fmt.Errorf("%w: %s", ErrInvalidOperation, op)
 	}
 	consolidate := make([]float64, 0)
 	resCh := make(chan []float64)
 	errCh := make(chan error)
 	doneCh := make(chan struct{})
 	filesCh := make(chan string)
 	wg := sync.WaitGroup{}
-	for _, fname := range filenames {
+	go func() {
-		f, err := os.Open(fname)
+		defer close(filesCh)
-		if err != nil {
+		for _, fname := range filenames {
-			return fmt.Errorf("cannot open file: %w", err)
+			filesCh <- fname
 		}
 	}()
-		data, err := csv2float(f, column)
+	for i := 0; i < runtime.NumCPU(); i++ {
-		if err != nil {
+		wg.Add(1)
-			return err
+		go func() {
-		}
+			defer wg.Done()
 			for fname := range filesCh {
 				f, err := os.Open(fname)
 				if err != nil {
 					errCh <- fmt.Errorf("cannot open file: %w", err)
 					return
 				}
-		if err := f.Close(); err != nil {
+				data, err := csv2float(f, column)
-			return err
+				if err != nil {
-		}
+					errCh <- err
 				}
 				if err := f.Close(); err != nil {
 					errCh <- err
 				}
 				resCh <- data
 			}
 		}()
 		consolidate = append(consolidate, data...)
 	}
-	_, err := fmt.Fprintln(out, opFunc(consolidate))
+	go func() {
-	return err
+		wg.Wait()
 		close(doneCh)
 	}()
 	// TODO try to improve the performance of min and max
 	// by running the functions in multiple gorutines
 	// or by trying to run the functions for each file that is read
 	for {
 		select {
 		case err := <-errCh:
 			return err
 		case data := <-resCh:
 			if op == "min" || op == "max" {
 				// todo see if using goroutines here can improve the performance
 				// spawn like 4 goroutines and divide the date between them to be processed
 				minWg := sync.WaitGroup{}
 				theData := make(chan float64)
 				quarterLength := len(data) / 4
 				start := 0
 				endQuarter := quarterLength
 				incrementQuarter := 2
 				for i := 0; i < 4; i++ {
 					minWg.Add(1)
 					go func(start, end int) {
 						defer minWg.Done()
 						theData <- opFunc(data[start:end])
 					}(start, endQuarter)
 					start = endQuarter
 					endQuarter = quarterLength * incrementQuarter
 					incrementQuarter++
 				}
 				go func() {
 					minWg.Wait()
 					close(theData)
 				}()
 				for quarterData := range theData {
 					consolidate = append(consolidate, quarterData)
 				}
 			} else {
 				consolidate = append(consolidate, data...)
 			}
 		case <-doneCh:
 			_, err := fmt.Fprintln(out, opFunc(consolidate))
 			return err
 		}
 	}
 }
--- a/main_test.go
+++ b/main_test.go
@@ -76,7 +76,7 @@ func BenchmarkRun(b *testing.B) {
 	// reset the time before running the benchmark loop
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		if err := run(filenames, "avg", 2, io.Discard); err != nil {
+		if err := run(filenames, "max", 2, io.Discard); err != nil {
 			b.Error(err)
 		}
 	}
--- a/mem00.pprof
+++ b/mem00.pprof
--- a/mem00max.pprof
+++ b/mem00max.pprof
--- a/trace01.out
+++ b/trace01.out
--- a/trace01max.out
+++ b/trace01max.out
--- a/trace02.out
+++ b/trace02.out
--- a/trace03.out
+++ b/trace03.out
Author	SHA1	Message	Date
rjianu	a39e7c73a5	chore: Add a tracer and cpu profiler files	2023-07-18 14:58:38 +03:00
rjianu	3a134d8f1e	chore: Add bench logs for first implementation of min max	2023-07-18 14:47:23 +03:00
rjianu	c37dfd4918	chore: Add bench for min max without the goroutines	2023-07-18 14:45:16 +03:00
rjianu	7293844a1f	feat: Improve performance for min max using goroutines	2023-07-18 14:43:10 +03:00
rjianu	20d698d435	feat: Update the min max performance	2023-07-17 20:39:29 +03:00
rjianu	607561b9fe	feat: Add min and max and try to improve the performance	2023-07-17 20:18:16 +03:00
rjianu	f837a1d217	feat: Improve the concurrency	2023-07-12 17:30:18 +03:00
rjianu	9f35c4eeeb	feat: Improve the program execution time by using gorutines	2023-07-12 16:56:36 +03:00
rjianu	209c1b93c6	chore: All the benchmark stats	2023-07-12 16:29:25 +03:00
rjianu	3bd76e879b	feat: Improve memory allocation	2023-07-12 16:26:54 +03:00