From 25841f26a1cd7f0288d174ac8cb1367ac55b7200 Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 22 Jan 2025 15:52:39 +0100 Subject: [PATCH 01/30] feat(analytics): session/user trends --- backend/pkg/analytics/cards/handlers.go | 6 + backend/pkg/analytics/cards/model.go | 31 ++ backend/pkg/analytics/charts/charts.go | 72 +++++ backend/pkg/analytics/charts/counters.go | 364 +++++++++++++++++++++++ backend/pkg/analytics/charts/handlers.go | 3 +- backend/pkg/analytics/charts/model.go | 6 +- 6 files changed, 478 insertions(+), 4 deletions(-) create mode 100644 backend/pkg/analytics/charts/counters.go diff --git a/backend/pkg/analytics/cards/handlers.go b/backend/pkg/analytics/cards/handlers.go index f0cf16d02..a47c1153a 100644 --- a/backend/pkg/analytics/cards/handlers.go +++ b/backend/pkg/analytics/cards/handlers.go @@ -46,6 +46,7 @@ func (e *handlersImpl) GetAll() []*api.Description { {"/v1/analytics/{projectId}/cards/{id}", e.getCard, "GET"}, {"/v1/analytics/{projectId}/cards/{id}", e.updateCard, "PUT"}, {"/v1/analytics/{projectId}/cards/{id}", e.deleteCard, "DELETE"}, + {"/v1/analytics/{projectId}/cards/{id}/sessions", e.getCardSessions, "POST"}, } } @@ -296,3 +297,8 @@ func (e *handlersImpl) deleteCard(w http.ResponseWriter, r *http.Request) { e.responser.ResponseWithJSON(e.log, r.Context(), w, nil, startTime, r.URL.Path, bodySize) } + +func (e *handlersImpl) getCardSessions(w http.ResponseWriter, r *http.Request) { + // TODO: implement this + e.responser.ResponseWithError(e.log, r.Context(), w, http.StatusNotImplemented, fmt.Errorf("not implemented"), time.Now(), r.URL.Path, 0) +} diff --git a/backend/pkg/analytics/cards/model.go b/backend/pkg/analytics/cards/model.go index 5ab4144f0..0e88dfbf5 100644 --- a/backend/pkg/analytics/cards/model.go +++ b/backend/pkg/analytics/cards/model.go @@ -192,3 +192,34 @@ func (s *CardListSort) GetSQLField() string { func (s *CardListSort) GetSQLOrder() string { return strings.ToUpper(s.Order) } + +// --- + +/* +class IssueType(str, Enum): + + CLICK_RAGE = 'click_rage' + DEAD_CLICK = 'dead_click' + EXCESSIVE_SCROLLING = 'excessive_scrolling' + BAD_REQUEST = 'bad_request' + MISSING_RESOURCE = 'missing_resource' + MEMORY = 'memory' + CPU = 'cpu' + SLOW_RESOURCE = 'slow_resource' + SLOW_PAGE_LOAD = 'slow_page_load' + CRASH = 'crash' + CUSTOM = 'custom' + JS_EXCEPTION = 'js_exception' + MOUSE_THRASHING = 'mouse_thrashing' + # IOS + TAP_RAGE = 'tap_rage' +*/ +type IssueType string +type ChartData struct { + StartTs uint64 `json:"startTs"` + EndTs uint64 `json:"endTs"` + Density uint64 `json:"density"` + Filters []FilterItem `json:"filter"` + MetricOf string `json:"metricOf"` + MetricValue []IssueType `json:"metricValue"` +} diff --git a/backend/pkg/analytics/charts/charts.go b/backend/pkg/analytics/charts/charts.go index 1916695fa..bf242f6b4 100644 --- a/backend/pkg/analytics/charts/charts.go +++ b/backend/pkg/analytics/charts/charts.go @@ -3,6 +3,8 @@ package charts import ( "encoding/json" "fmt" + "github.com/ClickHouse/clickhouse-go/v2/lib/driver" + "openreplay/backend/pkg/analytics/cards" "openreplay/backend/pkg/db/postgres/pool" "openreplay/backend/pkg/logger" @@ -15,6 +17,7 @@ type Charts interface { type chartsImpl struct { log logger.Logger pgconn pool.Pool + chConn driver.Conn } func New(log logger.Logger, conn pool.Pool) (Charts, error) { @@ -24,7 +27,39 @@ func New(log logger.Logger, conn pool.Pool) (Charts, error) { }, nil } +// def get_chart() func (s *chartsImpl) GetData(projectId int, userID uint64, req *GetCardChartDataRequest) ([]DataPoint, error) { + if req == nil { + return nil, fmt.Errorf("request is empty") + } + switch { + case req.MetricType == "funnel": + return nil, fmt.Errorf("funnel metric type is not supported yet") + case req.MetricType == "heatMap": + return nil, fmt.Errorf("heatMap metric type is not supported yet") + case req.MetricType == "pathAnalysis": + return nil, fmt.Errorf("pathAnalysis metric type is not supported yet") + + case req.MetricType == "timeseries": + return s.getTimeseriesCharts(projectId, userID, req) + case req.MetricType == "table": + return nil, fmt.Errorf("table metric type is not supported yet") + + case req.MetricType == "errors": + fallthrough + case req.MetricType == "performance": + fallthrough + case req.MetricType == "resources": + fallthrough + case req.MetricType == "webVitals": + return s.getMetric(projectId, userID, req) + + case req.MetricType == "retention": + return nil, fmt.Errorf("retention metric type is not supported yet") + case req.MetricType == "stickiness": + return nil, fmt.Errorf("stickiness metric type is not supported yet") + + } jsonInput := ` { "data": [ @@ -48,3 +83,40 @@ func (s *chartsImpl) GetData(projectId int, userID uint64, req *GetCardChartData return resp.Data, nil } + +func (s *chartsImpl) getMetric(projectID int, userID uint64, req *GetCardChartDataRequest) ([]DataPoint, error) { + switch req.MetricOf { + case "countSessions": // metrics.get_processed_sessions + return nil, fmt.Errorf("countSessions metric type is not supported yet") + case "avgVisitedPages": // metrics.get_user_activity_avg_visited_pages + return nil, fmt.Errorf("avgVisitedPages metric type is not supported yet") + case "countRequests": // metrics.get_top_metrics_count_requests + return nil, fmt.Errorf("countRequests metric type is not supported yet") + case "impactedSessionsByJsErrors": // metrics.get_impacted_sessions_by_js_errors + return nil, fmt.Errorf("impactedSessionsByJsErrors metric type is not supported yet") + case "domainsErrors4xx": // metrics.get_domains_errors_4xx + return nil, fmt.Errorf("domainsErrors4xx metric type is not supported yet") + case "domainsErrors5xx": // metrics.get_domains_errors_5xx + return nil, fmt.Errorf("domainsErrors5xx metric type is not supported yet") + case "errorsPerDomains": // metrics.get_errors_per_domains + return nil, fmt.Errorf("errorsPerDomains metric type is not supported yet") + case "errorsPerType": // metrics.get_errors_per_type + return nil, fmt.Errorf("errorsPerType metric type is not supported yet") + + } + return nil, fmt.Errorf("metric type is not supported yet") + +} + +func (s *chartsImpl) getTimeseriesCharts(projectID int, userID uint64, req *GetCardChartDataRequest) ([]DataPoint, error) { + charts := []interface{}{} + for _, series := range req.Series { + res, err := s.searchSeries(projectID, series) + if err != nil { + return nil, err + } + charts = append(charts, res) + } + results := []interface{}{} + return results, nil +} diff --git a/backend/pkg/analytics/charts/counters.go b/backend/pkg/analytics/charts/counters.go new file mode 100644 index 000000000..92a664a0a --- /dev/null +++ b/backend/pkg/analytics/charts/counters.go @@ -0,0 +1,364 @@ +package charts + +import ( + "context" + "fmt" + "log" + "strconv" + "strings" +) + +type Fields map[string]string + +func getSessionMetaFields() Fields { + return Fields{ + "revId": "rev_id", + "country": "user_country", + "os": "user_os", + "platform": "user_device_type", + "device": "user_device", + "browser": "user_browser", + } +} + +func getMetadataFields() Fields { + return Fields{ + "userId": "user_id", + "userAnonymousId": "user_anonymous_id", + "metadata1": "metadata_1", + "metadata2": "metadata_2", + "metadata3": "metadata_3", + "metadata4": "metadata_4", + "metadata5": "metadata_5", + "metadata6": "metadata_6", + "metadata7": "metadata_7", + "metadata8": "metadata_8", + "metadata9": "metadata_9", + "metadata10": "metadata_10", + } +} + +func getStepSize(startTimestamp, endTimestamp, density uint64, decimal bool, factor uint64) float64 { + stepSize := (endTimestamp / factor) - (startTimestamp / factor) // TODO: should I use float64 here? + if !decimal { + density-- + } + return float64(stepSize) / float64(density) +} + +func getBasicConstraints(tableName string, timeConstraint, roundStart bool, data map[string]interface{}, identifier string) []string { // Если tableName не пустая, добавляем точку + if tableName != "" { + tableName += "." + } + chSubQuery := []string{fmt.Sprintf("%s%s = toUInt16(:%s)", tableName, identifier, identifier)} + + if timeConstraint { + if roundStart { + chSubQuery = append(chSubQuery, fmt.Sprintf("toStartOfInterval(%sdatetime, INTERVAL :step_size second) >= toDateTime(:startTimestamp/1000)", tableName)) + } else { + chSubQuery = append(chSubQuery, fmt.Sprintf("%sdatetime >= toDateTime(:startTimestamp/1000)", tableName)) + } + chSubQuery = append(chSubQuery, fmt.Sprintf("%sdatetime < toDateTime(:endTimestamp/1000)", tableName)) + } + return append(chSubQuery, getGenericConstraint(data, tableName)...) +} + +func getGenericConstraint(data map[string]interface{}, tableName string) []string { + return getConstraint(data, getSessionMetaFields(), tableName) +} + +func getConstraint(data map[string]interface{}, fields Fields, tableName string) []string { + var constraints []string + filters, err := data["filters"].([]map[string]interface{}) + if !err { + log.Println("error getting filters from data") + filters = make([]map[string]interface{}, 0) // to skip the next block + } + + // process filters + for i, f := range filters { + key, _ := f["key"].(string) + value, _ := f["value"].(string) + + if field, ok := fields[key]; ok { + if value == "*" || value == "" { + constraints = append(constraints, fmt.Sprintf("isNotNull(%s%s)", tableName, field)) + } else { + // constraints.append(f"{table_name}{fields[f['key']]} = %({f['key']}_{i})s") + constraints = append(constraints, fmt.Sprintf("%s%s = %%(%s_%d)s", tableName, field, key, i)) // TODO: where we'll keep the value? + } + } + } + + // TODO from Python: remove this in next release + offset := len(filters) + for i, f := range data { + key, _ := f.(string) + value, _ := data[key].(string) + + if field, ok := fields[key]; ok { + if value == "*" || value == "" { + constraints = append(constraints, fmt.Sprintf("isNotNull(%s%s)", tableName, field)) + } else { + intI, err := strconv.Atoi(i) + if err != nil { + log.Printf("error converting data[k] to int: %v", err) + continue + } else { + constraints = append(constraints, fmt.Sprintf("%s%s = %%(%s_%d)s", tableName, field, f, intI+offset)) + } + } + } + } + return constraints +} + +func getMetaConstraint(data map[string]interface{}) []string { + return getConstraint(data, getMetadataFields(), "sessions_metadata.") +} + +func getConstraintValues(data map[string]interface{}) map[string]interface{} { + params := make(map[string]interface{}) + + if filters, ok := data["filters"].([]map[string]interface{}); ok { + for i, f := range filters { + key, _ := f["key"].(string) + value := f["value"] + params[fmt.Sprintf("%s_%d", key, i)] = value + } + + // TODO from Python: remove this in next release + offset := len(data["filters"].([]map[string]interface{})) + i := 0 + for k, v := range data { + params[fmt.Sprintf("%s_%d", k, i+offset)] = v + i++ + } + } + + return params +} + +/* +def get_main_sessions_table(timestamp=0): + + return "experimental.sessions_l7d_mv" \ + if config("EXP_7D_MV", cast=bool, default=True) \ + and timestamp and timestamp >= TimeUTC.now(delta_days=-7) else "experimental.sessions" +*/ +func getMainSessionsTable(timestamp uint64) string { + return "experimental.sessions" +} + +// Function to convert named parameters to positional parameters +func replaceNamedParams(query string, params map[string]interface{}) (string, []interface{}) { + var args []interface{} + i := 1 + for key, val := range params { + placeholder := ":" + key + //query = strings.Replace(query, placeholder, "?", 1) + strVal := fmt.Sprintf("%v", val) + query = strings.Replace(query, placeholder, strVal, -1) + args = append(args, val) + i++ + } + return query, args +} + +// Helper function to generate a range of floats +func frange(start, end, step float64) []float64 { + var rangeValues []float64 + for i := start; i < end; i += step { + rangeValues = append(rangeValues, i) + } + return rangeValues +} + +// Helper function to add missing keys from the "complete" map to the "original" map +func addMissingKeys(original, complete map[string]interface{}) map[string]interface{} { + for k, v := range complete { + if _, exists := original[k]; !exists { + original[k] = v + } + } + return original +} + +// CompleteMissingSteps fills in missing steps in the data +func CompleteMissingSteps( + startTime, endTime uint64, + density int, + neutral map[string]interface{}, + rows []map[string]interface{}, + timeKey string, + timeCoefficient int64, +) []map[string]interface{} { + if len(rows) == density { + return rows + } + + // Calculate the step size + step := getStepSize(startTime, endTime, uint64(density), true, 1000) + optimal := make([][2]uint64, 0) + for _, i := range frange(float64(startTime)/float64(timeCoefficient), float64(endTime)/float64(timeCoefficient), step) { + startInterval := uint64(i * float64(timeCoefficient)) + endInterval := uint64((i + step) * float64(timeCoefficient)) + optimal = append(optimal, [2]uint64{startInterval, endInterval}) + } + + var result []map[string]interface{} + r, o := 0, 0 + + // Iterate over density + for i := 0; i < density; i++ { + // Clone the neutral map + neutralClone := make(map[string]interface{}) + for k, v := range neutral { + if fn, ok := v.(func() interface{}); ok { + neutralClone[k] = fn() + } else { + neutralClone[k] = v + } + } + + // If we can just add the rest of the rows to result + if r < len(rows) && len(result)+len(rows)-r == density { + result = append(result, rows[r:]...) + break + } + + // Determine where the current row fits within the optimal intervals + if r < len(rows) && o < len(optimal) && rows[r][timeKey].(uint64) < optimal[o][0] { + rows[r] = addMissingKeys(rows[r], neutralClone) + result = append(result, rows[r]) + r++ + } else if r < len(rows) && o < len(optimal) && optimal[o][0] <= rows[r][timeKey].(uint64) && rows[r][timeKey].(uint64) < optimal[o][1] { + rows[r] = addMissingKeys(rows[r], neutralClone) + result = append(result, rows[r]) + r++ + o++ + } else { + neutralClone[timeKey] = optimal[o][0] + result = append(result, neutralClone) + o++ + } + } + return result +} + +func progress(oldVal, newVal uint64) float64 { + if newVal > 0 { + return (float64(oldVal-newVal) / float64(newVal)) * 100 + } + if oldVal == 0 { + return 0 + } + return 100 +} + +// Trying to find a common part +func parse(projectID uint64, startTs, endTs uint64, density uint64, args map[string]interface{}) ([]string, []string, map[string]interface{}) { + stepSize := getStepSize(startTs, endTs, density, false, 1000) + chSubQuery := getBasicConstraints("sessions", true, false, args, "project_id") + chSubQueryChart := getBasicConstraints("sessions", true, true, args, "project_id") + metaCondition := getMetaConstraint(args) + chSubQuery = append(chSubQuery, metaCondition...) + chSubQueryChart = append(chSubQueryChart, metaCondition...) + + params := map[string]interface{}{ + "step_size": stepSize, + "project_id": projectID, + "startTimestamp": startTs, + "endTimestamp": endTs, + } + for k, v := range getConstraintValues(args) { + params[k] = v + } + return chSubQuery, chSubQueryChart, params +} + +// Sessions trend +func (c *chartsImpl) getProcessedSessions(projectID uint64, startTs, endTs uint64, density uint64, args map[string]interface{}) { + chQuery := ` + SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, + COUNT(DISTINCT sessions.session_id) AS value + FROM :main_sessions_table AS sessions + WHERE :sub_query_chart + GROUP BY timestamp + ORDER BY timestamp; + ` + chSubQuery, chSubQueryChart, params := parse(projectID, startTs, endTs, density, args) + + chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) + chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) + + preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) + rows, err := c.chConn.Query(context.Background(), preparedQuery, preparedArgs) + if err != nil { + log.Fatalf("Error executing query: %v", err) + } + preparedRows := make([]map[string]interface{}, 0) + var sum uint64 + for rows.Next() { + var timestamp, value uint64 + if err := rows.Scan(×tamp, &value); err != nil { + log.Fatalf("Error scanning row: %v", err) + } + fmt.Printf("Timestamp: %d, Value: %d\n", timestamp, value) + sum += value + preparedRows = append(preparedRows, map[string]interface{}{"timestamp": timestamp, "value": value}) + } + + results := map[string]interface{}{ + "value": sum, + "chart": CompleteMissingSteps(startTs, endTs, int(density), map[string]interface{}{"value": 0}, preparedRows, "timestamp", 1000), + } + + diff := endTs - startTs + endTs = startTs + startTs = endTs - diff + + log.Println(results) + + chQuery = fmt.Sprintf(` + SELECT COUNT(1) AS count + FROM :main_sessions_table AS sessions + WHERE :sub_query_chart; + `) + chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) + chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQuery, " AND "), -1) + + var count uint64 + + preparedQuery, preparedArgs = replaceNamedParams(chQuery, params) + if err := c.chConn.QueryRow(context.Background(), preparedQuery, preparedArgs).Scan(&count); err != nil { + log.Fatalf("Error executing query: %v", err) + } + + results["progress"] = progress(count, results["value"].(uint64)) + + // TODO: this should be returned in any case + results["unit"] = "COUNT" + fmt.Println(results) +} + +// Users trend +//func getUniqueUsers(projectID uint64, startTs, endTs uint64, density uint64, args map[string]interface{}) { +// chQuery := ` +// SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, +// COUNT(DISTINCT sessions.user_id) AS value +// FROM :main_sessions_table AS sessions +// WHERE :sub_query_chart +// GROUP BY timestamp +// ORDER BY timestamp; +// ` +// chSubQuery, chSubQueryChart, params := parse(projectID, startTs, endTs, density, args) +// chSubQueryChart = append(chSubQueryChart, []string{"isNotNull(sessions.user_id)", "sessions.user_id!=''"}...) +// +// chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) +// chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) +// +// preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) +// +// return +//} diff --git a/backend/pkg/analytics/charts/handlers.go b/backend/pkg/analytics/charts/handlers.go index 771732b43..2b35c5b3c 100644 --- a/backend/pkg/analytics/charts/handlers.go +++ b/backend/pkg/analytics/charts/handlers.go @@ -41,8 +41,9 @@ type handlersImpl struct { func (e *handlersImpl) GetAll() []*api.Description { return []*api.Description{ - {"/v1/analytics/{projectId}/cards/{id}/chart", e.getCardChartData, "POST"}, + {"/v1/analytics/{projectId}/cards/{id}/chart", e.getCardChartData, "POST"}, // for dashboards {"/v1/analytics/{projectId}/cards/{id}/try", e.getCardChartData, "POST"}, + {"/v1/analytics/{projectId}/cards/try", e.getCardChartData, "POST"}, // for cards itself } } diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 81016c7e6..399fd531c 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -8,9 +8,9 @@ type DataPoint struct { } type GetCardChartDataRequest struct { - MetricType string `json:"metricType" validate:"required,oneof=timeseries table funnel"` - MetricOf string `json:"metricOf" validate:"required,oneof=session_count user_count"` - ViewType string `json:"viewType" validate:"required,oneof=line_chart table_view"` + MetricType string `json:"metricType" validate:"required,oneof=timeseries table funnel errors performance resources webVitals pathAnalysis retention stickiness heatMap"` + MetricOf string `json:"metricOf" validate:"required,oneof=sessionCount userCount"` + ViewType string `json:"viewType" validate:"required,oneof=lineChart areaChart barChart pieChart progressChart table metric"` MetricFormat string `json:"metricFormat" validate:"required,oneof=default percentage"` SessionID int64 `json:"sessionId"` Series []cards.CardSeries `json:"series" validate:"required,dive"` From 4eae2ef439560dd1600a60a42829b7e1d4396f03 Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 22 Jan 2025 16:12:35 +0100 Subject: [PATCH 02/30] feat(analytics): updated user trends method --- backend/pkg/analytics/charts/counters.go | 85 ++++++++++++++++++------ 1 file changed, 66 insertions(+), 19 deletions(-) diff --git a/backend/pkg/analytics/charts/counters.go b/backend/pkg/analytics/charts/counters.go index 92a664a0a..fbbc3c93d 100644 --- a/backend/pkg/analytics/charts/counters.go +++ b/backend/pkg/analytics/charts/counters.go @@ -343,22 +343,69 @@ func (c *chartsImpl) getProcessedSessions(projectID uint64, startTs, endTs uint6 } // Users trend -//func getUniqueUsers(projectID uint64, startTs, endTs uint64, density uint64, args map[string]interface{}) { -// chQuery := ` -// SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, -// COUNT(DISTINCT sessions.user_id) AS value -// FROM :main_sessions_table AS sessions -// WHERE :sub_query_chart -// GROUP BY timestamp -// ORDER BY timestamp; -// ` -// chSubQuery, chSubQueryChart, params := parse(projectID, startTs, endTs, density, args) -// chSubQueryChart = append(chSubQueryChart, []string{"isNotNull(sessions.user_id)", "sessions.user_id!=''"}...) -// -// chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) -// chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) -// -// preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) -// -// return -//} +func (c *chartsImpl) getUniqueUsers(projectID uint64, startTs, endTs uint64, density uint64, args map[string]interface{}) { + chQuery := ` + SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, + COUNT(DISTINCT sessions.user_id) AS value + FROM :main_sessions_table AS sessions + WHERE :sub_query_chart + GROUP BY timestamp + ORDER BY timestamp; + ` + chSubQuery, chSubQueryChart, params := parse(projectID, startTs, endTs, density, args) + chSubQueryChart = append(chSubQueryChart, []string{"isNotNull(sessions.user_id)", "sessions.user_id!=''"}...) + + chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) + chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) + + preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) + rows, err := c.chConn.Query(context.Background(), preparedQuery, preparedArgs) + if err != nil { + log.Fatalf("Error executing query: %v", err) + } + preparedRows := make([]map[string]interface{}, 0) + var sum uint64 + for rows.Next() { + var timestamp, value uint64 + if err := rows.Scan(×tamp, &value); err != nil { + log.Fatalf("Error scanning row: %v", err) + } + fmt.Printf("Timestamp: %d, Value: %d\n", timestamp, value) + sum += value + preparedRows = append(preparedRows, map[string]interface{}{"timestamp": timestamp, "value": value}) + } + + results := map[string]interface{}{ + "value": sum, + "chart": CompleteMissingSteps(startTs, endTs, int(density), map[string]interface{}{"value": 0}, preparedRows, "timestamp", 1000), + } + + diff := endTs - startTs + endTs = startTs + startTs = endTs - diff + + log.Println(results) + + chQuery = fmt.Sprintf(` + SELECT COUNT(DISTINCT user_id) AS count + FROM :main_sessions_table AS sessions + WHERE :sub_query_chart; + `) + chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) + chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQuery, " AND "), -1) + + var count uint64 + + preparedQuery, preparedArgs = replaceNamedParams(chQuery, params) + if err := c.chConn.QueryRow(context.Background(), preparedQuery, preparedArgs).Scan(&count); err != nil { + log.Fatalf("Error executing query: %v", err) + } + + results["progress"] = progress(count, results["value"].(uint64)) + + // TODO: this should be returned in any case + results["unit"] = "COUNT" + fmt.Println(results) + + return +} From 47099182543bd6ca61611631b6d102da0c46d90c Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 28 Jan 2025 10:16:51 +0100 Subject: [PATCH 03/30] feat(analytics): filters --- backend/pkg/analytics/cards/model.go | 18 + backend/pkg/analytics/charts/charts.go | 45 +- backend/pkg/analytics/charts/counters.go | 41 +- backend/pkg/analytics/charts/model.go | 48 +- backend/pkg/analytics/query/chartQuery.go | 649 ++++++++++++++++++++++ 5 files changed, 776 insertions(+), 25 deletions(-) create mode 100644 backend/pkg/analytics/query/chartQuery.go diff --git a/backend/pkg/analytics/cards/model.go b/backend/pkg/analytics/cards/model.go index 0e88dfbf5..4b5dd8a10 100644 --- a/backend/pkg/analytics/cards/model.go +++ b/backend/pkg/analytics/cards/model.go @@ -6,6 +6,24 @@ import ( "time" ) +type MetricType string +type MetricOfTimeseries string +type MetricOfTable string + +const ( + MetricTypeTimeseries MetricType = "TIMESERIES" + MetricTypeTable MetricType = "TABLE" + + MetricOfTimeseriesSessionCount MetricOfTimeseries = "SESSION_COUNT" + MetricOfTimeseriesUserCount MetricOfTimeseries = "USER_COUNT" + + MetricOfTableVisitedURL MetricOfTable = "VISITED_URL" + MetricOfTableIssues MetricOfTable = "ISSUES" + MetricOfTableUserCountry MetricOfTable = "USER_COUNTRY" + MetricOfTableUserDevice MetricOfTable = "USER_DEVICE" + MetricOfTableUserBrowser MetricOfTable = "USER_BROWSER" +) + // CardBase Common fields for the Card entity type CardBase struct { Name string `json:"name" validate:"required"` diff --git a/backend/pkg/analytics/charts/charts.go b/backend/pkg/analytics/charts/charts.go index bf242f6b4..724a814a7 100644 --- a/backend/pkg/analytics/charts/charts.go +++ b/backend/pkg/analytics/charts/charts.go @@ -27,7 +27,7 @@ func New(log logger.Logger, conn pool.Pool) (Charts, error) { }, nil } -// def get_chart() +// GetData def get_chart() func (s *chartsImpl) GetData(projectId int, userID uint64, req *GetCardChartDataRequest) ([]DataPoint, error) { if req == nil { return nil, fmt.Errorf("request is empty") @@ -109,14 +109,47 @@ func (s *chartsImpl) getMetric(projectID int, userID uint64, req *GetCardChartDa } func (s *chartsImpl) getTimeseriesCharts(projectID int, userID uint64, req *GetCardChartDataRequest) ([]DataPoint, error) { - charts := []interface{}{} + var dataPoints []DataPoint + var stepSize = getStepSize(req.StartTimestamp, req.EndTimestamp, req.Density, true, 1000) + var query string + + switch req.MetricOf { + case "sessionCount": + query = fmt.Sprintf(` + SELECT + toUnixTimestamp(toStartOfInterval(processed_sessions.datetime, INTERVAL %d second)) * 1000 AS timestamp, + COUNT(processed_sessions.session_id) AS count + FROM ( + SELECT + s.session_id AS session_id, + s.datetime AS datetime + %s + ) AS processed_sessions + GROUP BY timestamp + ORDER BY timestamp; + `, stepSize, "query_part") // Replace "query_part" with the actual query part + default: + return nil, fmt.Errorf("unsupported metric: %s", req.MetricOf) + } + + fmt.Printf("stepSize: %v\n", stepSize) + for _, series := range req.Series { res, err := s.searchSeries(projectID, series) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to search series: %w", err) + } + if seriesData, ok := res.([]DataPoint); ok { + dataPoints = append(dataPoints, seriesData...) + } else { + return nil, fmt.Errorf("unexpected data format from searchSeries") } - charts = append(charts, res) } - results := []interface{}{} - return results, nil + return dataPoints, nil +} + +func (s *chartsImpl) searchSeries(projectID int, series cards.CardSeries) (interface{}, error) { + + // Placeholder implementation + return []DataPoint{}, nil } diff --git a/backend/pkg/analytics/charts/counters.go b/backend/pkg/analytics/charts/counters.go index fbbc3c93d..431104b03 100644 --- a/backend/pkg/analytics/charts/counters.go +++ b/backend/pkg/analytics/charts/counters.go @@ -38,14 +38,29 @@ func getMetadataFields() Fields { } } -func getStepSize(startTimestamp, endTimestamp, density uint64, decimal bool, factor uint64) float64 { - stepSize := (endTimestamp / factor) - (startTimestamp / factor) // TODO: should I use float64 here? - if !decimal { - density-- +func getStepSize(startTimestamp, endTimestamp int64, density int, decimal bool, factor int) float64 { + factorInt64 := int64(factor) + stepSize := (endTimestamp / factorInt64) - (startTimestamp / factorInt64) + + if density <= 1 { + return float64(stepSize) } - return float64(stepSize) / float64(density) + + if decimal { + return float64(stepSize) / float64(density) + } + + return float64(stepSize / int64(density-1)) } +//func getStepSize(startTimestamp, endTimestamp, density uint64, decimal bool, factor uint64) float64 { +// stepSize := (endTimestamp / factor) - (startTimestamp / factor) // TODO: should I use float64 here? +// if !decimal { +// density-- +// } +// return float64(stepSize) / float64(density) +//} + func getBasicConstraints(tableName string, timeConstraint, roundStart bool, data map[string]interface{}, identifier string) []string { // Если tableName не пустая, добавляем точку if tableName != "" { tableName += "." @@ -146,8 +161,8 @@ def get_main_sessions_table(timestamp=0): if config("EXP_7D_MV", cast=bool, default=True) \ and timestamp and timestamp >= TimeUTC.now(delta_days=-7) else "experimental.sessions" */ -func getMainSessionsTable(timestamp uint64) string { - return "experimental.sessions" +func getMainSessionsTable(timestamp int64) string { + return "product_analytics.sessions" } // Function to convert named parameters to positional parameters @@ -186,7 +201,7 @@ func addMissingKeys(original, complete map[string]interface{}) map[string]interf // CompleteMissingSteps fills in missing steps in the data func CompleteMissingSteps( - startTime, endTime uint64, + startTime, endTime int64, density int, neutral map[string]interface{}, rows []map[string]interface{}, @@ -198,7 +213,7 @@ func CompleteMissingSteps( } // Calculate the step size - step := getStepSize(startTime, endTime, uint64(density), true, 1000) + step := getStepSize(startTime, endTime, density, true, 1000) optimal := make([][2]uint64, 0) for _, i := range frange(float64(startTime)/float64(timeCoefficient), float64(endTime)/float64(timeCoefficient), step) { startInterval := uint64(i * float64(timeCoefficient)) @@ -257,7 +272,7 @@ func progress(oldVal, newVal uint64) float64 { } // Trying to find a common part -func parse(projectID uint64, startTs, endTs uint64, density uint64, args map[string]interface{}) ([]string, []string, map[string]interface{}) { +func parse(projectID uint64, startTs, endTs int64, density int, args map[string]interface{}) ([]string, []string, map[string]interface{}) { stepSize := getStepSize(startTs, endTs, density, false, 1000) chSubQuery := getBasicConstraints("sessions", true, false, args, "project_id") chSubQueryChart := getBasicConstraints("sessions", true, true, args, "project_id") @@ -278,7 +293,7 @@ func parse(projectID uint64, startTs, endTs uint64, density uint64, args map[str } // Sessions trend -func (c *chartsImpl) getProcessedSessions(projectID uint64, startTs, endTs uint64, density uint64, args map[string]interface{}) { +func (s *chartsImpl) getProcessedSessions(projectID uint64, startTs, endTs int64, density int, args map[string]interface{}) { chQuery := ` SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, COUNT(DISTINCT sessions.session_id) AS value @@ -293,7 +308,7 @@ func (c *chartsImpl) getProcessedSessions(projectID uint64, startTs, endTs uint6 chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) - rows, err := c.chConn.Query(context.Background(), preparedQuery, preparedArgs) + rows, err := s.chConn.Query(context.Background(), preparedQuery, preparedArgs) if err != nil { log.Fatalf("Error executing query: %v", err) } @@ -331,7 +346,7 @@ func (c *chartsImpl) getProcessedSessions(projectID uint64, startTs, endTs uint6 var count uint64 preparedQuery, preparedArgs = replaceNamedParams(chQuery, params) - if err := c.chConn.QueryRow(context.Background(), preparedQuery, preparedArgs).Scan(&count); err != nil { + if err := s.chConn.QueryRow(context.Background(), preparedQuery, preparedArgs).Scan(&count); err != nil { log.Fatalf("Error executing query: %v", err) } diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 399fd531c..0ff630f2f 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -8,14 +8,50 @@ type DataPoint struct { } type GetCardChartDataRequest struct { - MetricType string `json:"metricType" validate:"required,oneof=timeseries table funnel errors performance resources webVitals pathAnalysis retention stickiness heatMap"` - MetricOf string `json:"metricOf" validate:"required,oneof=sessionCount userCount"` - ViewType string `json:"viewType" validate:"required,oneof=lineChart areaChart barChart pieChart progressChart table metric"` - MetricFormat string `json:"metricFormat" validate:"required,oneof=default percentage"` - SessionID int64 `json:"sessionId"` - Series []cards.CardSeries `json:"series" validate:"required,dive"` + StartTimestamp int64 `json:"startTimestamp" validate:"required"` + EndTimestamp int64 `json:"endTimestamp" validate:"required"` + Density int `json:"density" validate:"required"` + MetricType string `json:"metricType" validate:"required,oneof=timeseries table funnel errors performance resources webVitals pathAnalysis retention stickiness heatMap"` + MetricOf string `json:"metricOf" validate:"required,oneof=sessionCount userCount"` + ViewType string `json:"viewType" validate:"required,oneof=lineChart areaChart barChart pieChart progressChart table metric"` + MetricFormat string `json:"metricFormat" validate:"required,oneof=default percentage"` + SessionID int64 `json:"sessionId"` + Series []cards.CardSeries `json:"series" validate:"required,dive"` } type GetCardChartDataResponse struct { Data []DataPoint `json:"data"` } + +type MetricType string +type MetricOfTimeseries string +type MetricOfTable string + +const ( + MetricTypeTimeseries MetricType = "TIMESERIES" + MetricTypeTable MetricType = "TABLE" + + MetricOfTimeseriesSessionCount MetricOfTimeseries = "SESSION_COUNT" + MetricOfTimeseriesUserCount MetricOfTimeseries = "USER_COUNT" + + MetricOfTableVisitedURL MetricOfTable = "VISITED_URL" + MetricOfTableIssues MetricOfTable = "ISSUES" + MetricOfTableUserCountry MetricOfTable = "USER_COUNTRY" + MetricOfTableUserDevice MetricOfTable = "USER_DEVICE" + MetricOfTableUserBrowser MetricOfTable = "USER_BROWSER" +) + +type SessionsSearchPayload struct { + StartTimestamp int64 + EndTimestamp int64 + Filters []SessionSearchFilter +} + +type SessionSearchFilter struct { + Type FilterType + Value interface{} + Operator SearchEventOperator +} + +type SearchEventOperator string // Define constants as needed +type FilterType string // Define constants as needed diff --git a/backend/pkg/analytics/query/chartQuery.go b/backend/pkg/analytics/query/chartQuery.go new file mode 100644 index 000000000..47724b53d --- /dev/null +++ b/backend/pkg/analytics/query/chartQuery.go @@ -0,0 +1,649 @@ +package main + +import ( + "encoding/json" + "fmt" + "strings" +) + +func main() { + var r Root + err := json.Unmarshal([]byte(jsonInput), &r) + if err != nil { + panic(err) + } + + //fmt.Println("ARGS:", r) + fmt.Println(buildQuery(r)) + //fmt.Println("QUERY PART:", qp) +} + +type Table string +type Column string +type FilterType string +type EventOrder string +type FetchFilterType string + +const ( + UserOs FilterType = "userOs" + UserBrowser FilterType = "userBrowser" + UserDevice FilterType = "userDevice" + UserCountry FilterType = "userCountry" + UserCity FilterType = "userCity" + UserState FilterType = "userState" + UserId FilterType = "userId" + UserAnonymousId FilterType = "userAnonymousId" + Referrer FilterType = "referrer" + RevId FilterType = "revId" + UserOsIos FilterType = "userOsIos" + UserDeviceIos FilterType = "userDeviceIos" + UserCountryIos FilterType = "userCountryIos" + UserIdIos FilterType = "userIdIos" + UserAnonymousIdIos FilterType = "userAnonymousIdIos" + RevIdIos FilterType = "revIdIos" + Duration FilterType = "duration" + Platform FilterType = "platform" + Metadata FilterType = "metadata" + Issue FilterType = "issue" + EventsCount FilterType = "eventsCount" + UtmSource FilterType = "utmSource" + UtmMedium FilterType = "utmMedium" + UtmCampaign FilterType = "utmCampaign" + ThermalState FilterType = "thermalState" + MainThreadCPU FilterType = "mainThreadCPU" + ViewComponent FilterType = "viewComponent" + LogEvent FilterType = "logEvent" + ClickEvent FilterType = "clickEvent" + MemoryUsage FilterType = "memoryUsage" +) + +const ( + Click FilterType = "click" + Input FilterType = "input" + Location FilterType = "location" + Custom FilterType = "custom" + Request FilterType = "request" + Fetch FilterType = "fetch" + GraphQL FilterType = "graphql" + StateAction FilterType = "stateAction" + Error FilterType = "error" + Tag FilterType = "tag" + ClickMobile FilterType = "clickMobile" + InputMobile FilterType = "inputMobile" + ViewMobile FilterType = "viewMobile" + CustomMobile FilterType = "customMobile" + RequestMobile FilterType = "requestMobile" + ErrorMobile FilterType = "errorMobile" + SwipeMobile FilterType = "swipeMobile" +) + +const ( + EventOrderThen EventOrder = "then" + EventOrderOr EventOrder = "or" + EventOrderAnd EventOrder = "and" +) + +const ( + FetchFilterTypeFetchUrl FilterType = "fetchUrl" + FetchFilterTypeFetchStatusCode FilterType = "fetchStatusCode" + FetchFilterTypeFetchMethod FilterType = "fetchMethod" + FetchFilterTypeFetchDuration FilterType = "fetchDuration" + FetchFilterTypeFetchRequestBody FilterType = "fetchRequestBody" + FetchFilterTypeFetchResponseBody FilterType = "fetchResponseBody" +) + +const ( + OperatorStringIs = "is" + OperatorStringIsAny = "isAny" + OperatorStringOn = "on" + OperatorStringOnAny = "onAny" + OperatorStringIsNot = "isNot" + OperatorStringIsUndefined = "isUndefined" + OperatorStringNotOn = "notOn" + OperatorStringContains = "contains" + OperatorStringNotContains = "notContains" + OperatorStringStartsWith = "startsWith" + OperatorStringEndsWith = "endsWith" +) + +const ( + OperatorMathEq = "=" + OperatorMathLt = "<" + OperatorMathGt = ">" + OperatorMathLe = "<=" + OperatorMathGe = ">=" +) + +//-------------------------------------------------- +// Constants for columns, tables, etc. +//-------------------------------------------------- + +const ( + TableEvents Table = "product_analytics.events" + TableSessions Table = "experimental.sessions" + + ColEventTime Column = "main.created_at" + ColEventName Column = "main.`$event_name`" + ColEventProjectID Column = "main.project_id" + ColEventProperties Column = "main.`$properties`" + ColEventSessionID Column = "main.session_id" + ColEventURLPath Column = "main.url_path" + ColEventStatus Column = "main.status" + + ColSessionID Column = "s.session_id" + ColDuration Column = "s.duration" + ColUserCountry Column = "s.user_country" + ColUserCity Column = "s.user_city" + ColUserState Column = "s.user_state" + ColUserID Column = "s.user_id" + ColUserAnonymousID Column = "s.user_anonymous_id" + ColUserOS Column = "s.user_os" + ColUserBrowser Column = "s.user_browser" + ColUserDevice Column = "s.user_device" + ColUserDeviceType Column = "s.user_device_type" + ColRevID Column = "s.rev_id" + ColBaseReferrer Column = "s.base_referrer" + ColUtmSource Column = "s.utm_source" + ColUtmMedium Column = "s.utm_medium" + ColUtmCampaign Column = "s.utm_campaign" + ColMetadata1 Column = "s.metadata_1" + ColSessionProjectID Column = "s.project_id" + ColSessionIsNotNull Column = "isNotNull(s.duration)" +) + +type Root struct { + StartTimestamp int64 `json:"startTimestamp"` + EndTimestamp int64 `json:"endTimestamp"` + Series []Series `json:"series"` +} + +type Series struct { + SeriesID int64 `json:"seriesId"` + Name string `json:"name"` + Filter SeriesFilter `json:"filter"` +} + +type SeriesFilter struct { + Filters []FilterObj `json:"filters"` + EventsOrder EventOrder `json:"eventsOrder"` +} + +type FilterObj struct { + Type FilterType `json:"type"` + IsEvent bool `json:"isEvent"` + Value []string `json:"value"` + Operator string `json:"operator"` + Source string `json:"source"` + Filters []FilterObj `json:"filters"` +} + +// -------------------------------------------------- +func buildQuery(r Root) string { + s := r.Series[0] + + // iterate over series and partition filters + //for _, s := range r.Series { + // sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) + // sessionWhere := buildSessionWhere(sessionFilters) + // eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) + // fmt.Println("SESSION FILTERS:", sessionFilters) + // fmt.Println("EVENT FILTERS:", eventFilters) + // fmt.Println("SESSION WHERE:", sessionWhere) + // fmt.Println("EVENT WHERE:", eventWhere) + // fmt.Println("SEQ HAVING:", seqHaving) + //} + + sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) + sessionWhere := buildSessionWhere(sessionFilters) + eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) + + subQuery := fmt.Sprintf( + "SELECT %s,\n"+ + " MIN(%s) AS first_event_ts,\n"+ + " MAX(%s) AS last_event_ts\n"+ + "FROM %s AS main\n"+ + "WHERE main.project_id = %%(project_id)s\n"+ + " AND %s >= toDateTime(%%(start_time)s/1000)\n"+ + " AND %s <= toDateTime(%%(end_time)s/1000)\n"+ + " AND (%s)\n"+ + "GROUP BY %s\n"+ + "HAVING %s", + ColEventSessionID, + ColEventTime, + ColEventTime, + TableEvents, + ColEventTime, + ColEventTime, + strings.Join(eventWhere, " OR "), + ColEventSessionID, + seqHaving, + ) + + joinQuery := fmt.Sprintf( + "SELECT *\n"+ + "FROM %s AS s\n"+ + "INNER JOIN (\n"+ + " SELECT DISTINCT ev.session_id, ev.`$current_url` AS url_path\n"+ + " FROM %s AS ev\n"+ + " WHERE ev.created_at >= toDateTime(%%(start_time)s/1000)\n"+ + " AND ev.created_at <= toDateTime(%%(end_time)s/1000)\n"+ + " AND ev.project_id = %%(project_id)s\n"+ + " AND ev.`$event_name` = 'LOCATION'\n"+ + ") AS extra_event USING (session_id)\n"+ + "WHERE s.project_id = %%(project_id)s\n"+ + " AND %s\n"+ + " AND s.datetime >= toDateTime(%%(start_time)s/1000)\n"+ + " AND s.datetime <= toDateTime(%%(end_time)s/1000)\n", + TableSessions, + TableEvents, + ColSessionIsNotNull, + ) + + if len(sessionWhere) > 0 { + joinQuery += " AND " + strings.Join(sessionWhere, " AND ") + "\n" + } + + main := fmt.Sprintf( + "SELECT s.session_id AS session_id, s.url_path\n"+ + "FROM (\n%s\n) AS f\n"+ + "INNER JOIN (\n%s) AS s\n"+ + " ON (s.session_id = f.session_id)\n", + subQuery, + joinQuery, + ) + + final := fmt.Sprintf( + "SELECT COUNT(DISTINCT url_path) OVER () AS main_count,\n"+ + " url_path AS name,\n"+ + " COUNT(DISTINCT session_id) AS total,\n"+ + " COALESCE(SUM(COUNT(DISTINCT session_id)) OVER (), 0) AS total_count\n"+ + "FROM (\n%s) AS filtered_sessions\n"+ + "GROUP BY url_path\n"+ + "ORDER BY total DESC\n"+ + "LIMIT 200 OFFSET 0;", + main, + ) + + return final +} + +func partitionFilters(filters []FilterObj) (sessionFilters, eventFilters []FilterObj) { + for _, f := range filters { + if f.IsEvent { + eventFilters = append(eventFilters, f) + } else { + sessionFilters = append(sessionFilters, f) + } + } + return +} + +func buildSessionWhere(filters []FilterObj) []string { + var conds []string + for _, f := range filters { + switch f.Type { + case UserCountry: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value))) + case UserCity: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value))) + case UserState: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value))) + case UserId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value))) + case UserAnonymousId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value))) + case UserOs: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value))) + case UserBrowser: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value))) + case UserDevice: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value))) + case Platform: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value))) + case RevId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value))) + case Referrer: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value))) + case Duration: + if len(f.Value) == 2 { + conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0])) + conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1])) + } + case UtmSource: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value))) + case UtmMedium: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value))) + case UtmCampaign: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value))) + case Metadata: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value))) + } + } + // add /n to each condition + for i := range conds { + conds[i] += "\n" + } + return conds +} + +func parseOperator(op string) string { + switch strings.ToLower(op) { + case OperatorStringContains: + return OperatorMathEq // interpret as "LIKE" if needed + case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny: + return OperatorMathEq + case OperatorStringStartsWith: + // might interpret differently in real impl + return OperatorMathEq + case OperatorStringEndsWith: + // might interpret differently in real impl + return OperatorMathEq + default: + return OperatorMathEq + } +} + +func buildEventsWhere(filters []FilterObj, order EventOrder) (eventConditions []string, having string) { + basicEventTypes := "(" + + strings.Join([]string{ + fmt.Sprintf("%s = 'CLICK'", ColEventName), + fmt.Sprintf("%s = 'INPUT'", ColEventName), + fmt.Sprintf("%s = 'LOCATION'", ColEventName), + fmt.Sprintf("%s = 'CUSTOM'", ColEventName), + fmt.Sprintf("%s = 'REQUEST'", ColEventName), + }, " OR ") + ")" + + var seq []string + for _, f := range filters { + switch f.Type { + case Click: + seq = append(seq, seqCond("CLICK", "selector", f)) + case Input: + seq = append(seq, seqCond("INPUT", "label", f)) + case Location: + seq = append(seq, seqCond("LOCATION", "url_path", f)) + case Custom: + seq = append(seq, seqCond("CUSTOM", "name", f)) + case Fetch: + seq = append(seq, seqFetchCond("REQUEST", f)) + case FetchFilterTypeFetchStatusCode: + seq = append(seq, seqCond("REQUEST", "status", f)) + default: + seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type)))) + } + } + eventConditions = []string{basicEventTypes} + + // then => sequenceMatch + // or => OR + // and => AND + switch order { + case EventOrderThen: + var pattern []string + for i := range seq { + pattern = append(pattern, fmt.Sprintf("(?%d)", i+1)) + } + having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)", + strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n")) + case EventOrderAnd: + // build AND + having = strings.Join(seq, " AND ") + default: + // default => OR + var orParts []string + for _, p := range seq { + orParts = append(orParts, "("+p+")") + } + having = strings.Join(orParts, " OR ") + } + return +} + +func seqCond(eventName, key string, f FilterObj) string { + op := parseOperator(f.Operator) + return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')", + ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value)) +} + +func seqFetchCond(eventName string, f FilterObj) string { + w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))} + var extras []string + for _, c := range f.Filters { + switch c.Type { + case Fetch: + if len(c.Value) > 0 { + extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value))) + } + case FetchFilterTypeFetchStatusCode: + if len(c.Value) > 0 { + extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value))) + } + default: + // placeholder if needed + } + } + if len(extras) > 0 { + w = append(w, strings.Join(extras, " AND ")) + } + return "(" + strings.Join(w, " AND ") + ")" +} + +func concatValues(v []string) string { + return strings.Join(v, "") +} + +const jsonInput = ` +{ + "startTimestamp": 1737043724664, + "endTimestamp": 1737130124664, + "series": [ + { + "seriesId": 610, + "name": "Series 1", + "filter": { + "filters": [ + { + "type": "click", + "isEvent": true, + "value": ["DEPLOYMENT"], + "operator": "on", + "filters": [] + }, + { + "type": "input", + "isEvent": true, + "value": ["a"], + "operator": "contains", + "filters": [] + }, + { + "type": "location", + "isEvent": true, + "value": ["/en/using-or/"], + "operator": "is", + "filters": [] + }, + { + "type": "userCountry", + "isEvent": false, + "value": ["AD"], + "operator": "is", + "filters": [] + }, + { + "type": "userCity", + "isEvent": false, + "value": ["Mumbai"], + "operator": "is", + "filters": [] + }, + { + "type": "userState", + "isEvent": false, + "value": ["Maharashtra"], + "operator": "is", + "filters": [] + }, + { + "type": "userId", + "isEvent": false, + "value": ["test@test.com"], + "operator": "is", + "filters": [] + }, + { + "type": "userAnonymousId", + "isEvent": false, + "value": ["asd"], + "operator": "is", + "filters": [] + }, + { + "type": "userOs", + "isEvent": false, + "value": ["Mac OS X"], + "operator": "is", + "filters": [] + }, + { + "type": "userBrowser", + "isEvent": false, + "value": ["Chrome"], + "operator": "is", + "filters": [] + }, + { + "type": "userDevice", + "isEvent": false, + "value": ["iPhone"], + "operator": "is", + "filters": [] + }, + { + "type": "platform", + "isEvent": false, + "value": ["desktop"], + "operator": "is", + "filters": [] + }, + { + "type": "revId", + "isEvent": false, + "value": ["v1"], + "operator": "is", + "filters": [] + }, + { + "type": "referrer", + "isEvent": false, + "value": ["https://www.google.com/"], + "operator": "is", + "filters": [] + }, + { + "type": "duration", + "isEvent": false, + "value": ["60000", "6000000"], + "operator": "is", + "filters": [] + }, + { + "type": "tag", + "isEvent": true, + "value": ["8"], + "operator": "is", + "filters": [] + }, + { + "type": "utmSource", + "isEvent": false, + "value": ["aaa"], + "operator": "is", + "filters": [] + }, + { + "type": "utmMedium", + "isEvent": false, + "value": ["aa"], + "operator": "is", + "filters": [] + }, + { + "type": "utmCampaign", + "isEvent": false, + "value": ["aaa"], + "operator": "is", + "filters": [] + }, + { + "type": "metadata", + "isEvent": false, + "value": ["bbbb"], + "operator": "is", + "source": "userId", + "filters": [] + }, + { + "type": "custom", + "isEvent": true, + "value": ["test"], + "operator": "is", + "filters": [] + }, + { + "type": "fetch", + "isEvent": true, + "value": [], + "operator": "is", + "filters": [ + { + "type": "fetchUrl", + "isEvent": false, + "value": ["/ai/docs/chat"], + "operator": "is", + "filters": [] + }, + { + "type": "fetchStatusCode", + "isEvent": false, + "value": ["400"], + "operator": "=", + "filters": [] + }, + { + "type": "fetchMethod", + "isEvent": false, + "value": [], + "operator": "is", + "filters": [] + }, + { + "type": "fetchDuration", + "isEvent": false, + "value": [], + "operator": "=", + "filters": [] + }, + { + "type": "fetchRequestBody", + "isEvent": false, + "value": [], + "operator": "is", + "filters": [] + }, + { + "type": "fetchResponseBody", + "isEvent": false, + "value": [], + "operator": "is", + "filters": [] + } + ] + } + ], + "eventsOrder": "then" + } + } + ] +} +` From b0bf357be1e9be5e86aae4f71f8fc61121edb054 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 18 Feb 2025 11:42:39 +0100 Subject: [PATCH 04/30] feat(analytics): base structure --- backend/cmd/analytics/main.go | 96 +- backend/internal/config/analytics/config.go | 1 + backend/pkg/analytics/builder.go | 9 +- backend/pkg/analytics/cards/model.go | 4 +- backend/pkg/analytics/charts/charts.go | 144 +- backend/pkg/analytics/charts/counters.go | 269 ++-- backend/pkg/analytics/charts/handlers.go | 2 +- backend/pkg/analytics/charts/metric_funnel.go | 9 + backend/pkg/analytics/charts/metric_table.go | 253 ++++ .../pkg/analytics/charts/metric_timeseries.go | 116 ++ backend/pkg/analytics/charts/model.go | 190 ++- backend/pkg/analytics/charts/query.go | 150 ++ backend/pkg/analytics/db/connector.go | 64 + backend/pkg/analytics/query/chartQuery.go | 1299 +++++++++-------- backend/pkg/analytics/query/funnel.go | 7 + backend/pkg/analytics/query/model.go | 137 ++ backend/pkg/analytics/query/queryBuilder.go | 253 ++++ backend/pkg/analytics/query/table.go | 252 ++++ backend/pkg/analytics/query/timeseries.go | 42 + 19 files changed, 2282 insertions(+), 1015 deletions(-) create mode 100644 backend/pkg/analytics/charts/metric_funnel.go create mode 100644 backend/pkg/analytics/charts/metric_table.go create mode 100644 backend/pkg/analytics/charts/metric_timeseries.go create mode 100644 backend/pkg/analytics/charts/query.go create mode 100644 backend/pkg/analytics/db/connector.go create mode 100644 backend/pkg/analytics/query/funnel.go create mode 100644 backend/pkg/analytics/query/model.go create mode 100644 backend/pkg/analytics/query/queryBuilder.go create mode 100644 backend/pkg/analytics/query/table.go create mode 100644 backend/pkg/analytics/query/timeseries.go diff --git a/backend/cmd/analytics/main.go b/backend/cmd/analytics/main.go index 8a7b95c29..1a4b099dd 100644 --- a/backend/cmd/analytics/main.go +++ b/backend/cmd/analytics/main.go @@ -2,71 +2,49 @@ package main import ( "context" - "os" - "os/signal" - "syscall" - + analyticsConfig "openreplay/backend/internal/config/analytics" + "openreplay/backend/pkg/analytics" + "openreplay/backend/pkg/analytics/db" + "openreplay/backend/pkg/db/postgres/pool" "openreplay/backend/pkg/logger" + "openreplay/backend/pkg/metrics" + analyticsMetrics "openreplay/backend/pkg/metrics/analytics" + databaseMetrics "openreplay/backend/pkg/metrics/database" + "openreplay/backend/pkg/metrics/web" + "openreplay/backend/pkg/server" + "openreplay/backend/pkg/server/api" ) func main() { ctx := context.Background() log := logger.New() - log.Info(ctx, "Cacher service started") + cfg := analyticsConfig.New(log) + webMetrics := web.New("analytics") + metrics.New(log, append(webMetrics.List(), append(analyticsMetrics.List(), databaseMetrics.List()...)...)) - sigchan := make(chan os.Signal, 1) - signal.Notify(sigchan, syscall.SIGINT, syscall.SIGTERM) - - for { - select { - case sig := <-sigchan: - log.Error(ctx, "Caught signal %v: terminating", sig) - os.Exit(0) - } + pgConn, err := pool.New(cfg.Postgres.String()) + if err != nil { + log.Fatal(ctx, "can't init postgres connection: %s", err) } -} + defer pgConn.Close() -// -//import ( -// "context" -// -// analyticsConfig "openreplay/backend/internal/config/analytics" -// "openreplay/backend/pkg/analytics" -// "openreplay/backend/pkg/db/postgres/pool" -// "openreplay/backend/pkg/logger" -// "openreplay/backend/pkg/metrics" -// "openreplay/backend/pkg/metrics/database" -// "openreplay/backend/pkg/metrics/web" -// "openreplay/backend/pkg/server" -// "openreplay/backend/pkg/server/api" -//) -// -//func main() { -// ctx := context.Background() -// log := logger.New() -// cfg := analyticsConfig.New(log) -// // Observability -// webMetrics := web.New("analytics") -// dbMetrics := database.New("analytics") -// metrics.New(log, append(webMetrics.List(), dbMetrics.List()...)) -// -// pgConn, err := pool.New(dbMetrics, cfg.Postgres.String()) -// if err != nil { -// log.Fatal(ctx, "can't init postgres connection: %s", err) -// } -// defer pgConn.Close() -// -// builder, err := analytics.NewServiceBuilder(log, cfg, webMetrics, dbMetrics, pgConn) -// if err != nil { -// log.Fatal(ctx, "can't init services: %s", err) -// } -// -// router, err := api.NewRouter(&cfg.HTTP, log) -// if err != nil { -// log.Fatal(ctx, "failed while creating router: %s", err) -// } -// router.AddHandlers(api.NoPrefix, builder.CardsAPI, builder.DashboardsAPI, builder.ChartsAPI) -// router.AddMiddlewares(builder.Auth.Middleware, builder.RateLimiter.Middleware, builder.AuditTrail.Middleware) -// -// server.Run(ctx, log, &cfg.HTTP, router) -//} + chConn, err := db.NewConnector(cfg.Clickhouse) + if err != nil { + log.Fatal(ctx, "can't init clickhouse connection: %s", err) + } + defer chConn.Stop() + + builder, err := analytics.NewServiceBuilder(log, cfg, webMetrics, pgConn, chConn) + if err != nil { + log.Fatal(ctx, "can't init services: %s", err) + } + + router, err := api.NewRouter(&cfg.HTTP, log) + if err != nil { + log.Fatal(ctx, "failed while creating router: %s", err) + } + router.AddHandlers(api.NoPrefix, builder.CardsAPI, builder.DashboardsAPI, builder.ChartsAPI) + router.AddMiddlewares(builder.Auth.Middleware, builder.RateLimiter.Middleware, builder.AuditTrail.Middleware) + + server.Run(ctx, log, &cfg.HTTP, router) +} diff --git a/backend/internal/config/analytics/config.go b/backend/internal/config/analytics/config.go index b6ca5ce4c..90398240a 100644 --- a/backend/internal/config/analytics/config.go +++ b/backend/internal/config/analytics/config.go @@ -14,6 +14,7 @@ import ( type Config struct { common.Config common.Postgres + common.Clickhouse redis.Redis objectstorage.ObjectsConfig common.HTTP diff --git a/backend/pkg/analytics/builder.go b/backend/pkg/analytics/builder.go index 68098dc01..743373b50 100644 --- a/backend/pkg/analytics/builder.go +++ b/backend/pkg/analytics/builder.go @@ -3,6 +3,7 @@ package analytics import ( "github.com/go-playground/validator/v10" "openreplay/backend/pkg/analytics/charts" + "openreplay/backend/pkg/analytics/db" "openreplay/backend/pkg/metrics/database" "time" @@ -27,13 +28,14 @@ type ServicesBuilder struct { ChartsAPI api.Handlers } -func NewServiceBuilder(log logger.Logger, cfg *analytics.Config, webMetrics web.Web, dbMetrics database.Database, pgconn pool.Pool) (*ServicesBuilder, error) { +func NewServiceBuilder(log logger.Logger, cfg *analytics.Config, webMetrics web.Web, dbMetrics database.Database, pgconn pool.Pool, chConn db.Connector) (*ServicesBuilder, error) { responser := api.NewResponser(webMetrics) audiTrail, err := tracer.NewTracer(log, pgconn, dbMetrics) if err != nil { return nil, err } reqValidator := validator.New() + cardsService, err := cards.New(log, pgconn) if err != nil { return nil, err @@ -42,6 +44,7 @@ func NewServiceBuilder(log logger.Logger, cfg *analytics.Config, webMetrics web. if err != nil { return nil, err } + dashboardsService, err := dashboards.New(log, pgconn) if err != nil { return nil, err @@ -50,7 +53,8 @@ func NewServiceBuilder(log logger.Logger, cfg *analytics.Config, webMetrics web. if err != nil { return nil, err } - chartsService, err := charts.New(log, pgconn) + + chartsService, err := charts.New(log, pgconn, chConn) if err != nil { return nil, err } @@ -58,6 +62,7 @@ func NewServiceBuilder(log logger.Logger, cfg *analytics.Config, webMetrics web. if err != nil { return nil, err } + return &ServicesBuilder{ Auth: auth.NewAuth(log, cfg.JWTSecret, cfg.JWTSpotSecret, pgconn, nil, api.NoPrefix), RateLimiter: limiter.NewUserRateLimiter(10, 30, 1*time.Minute, 5*time.Minute), diff --git a/backend/pkg/analytics/cards/model.go b/backend/pkg/analytics/cards/model.go index 4b5dd8a10..0b42df18a 100644 --- a/backend/pkg/analytics/cards/model.go +++ b/backend/pkg/analytics/cards/model.go @@ -67,8 +67,8 @@ type CardSeries struct { } type SeriesFilter struct { - EventOrder string `json:"eventOrder" validate:"required,oneof=then or and"` - Filters []FilterItem `json:"filters"` + EventsOrder string `json:"eventsOrder" validate:"required,oneof=then or and"` + Filters []FilterItem `json:"filters"` } type FilterItem struct { diff --git a/backend/pkg/analytics/charts/charts.go b/backend/pkg/analytics/charts/charts.go index 724a814a7..46a40d364 100644 --- a/backend/pkg/analytics/charts/charts.go +++ b/backend/pkg/analytics/charts/charts.go @@ -1,155 +1,51 @@ package charts import ( - "encoding/json" "fmt" - "github.com/ClickHouse/clickhouse-go/v2/lib/driver" - "openreplay/backend/pkg/analytics/cards" - + "log" + "openreplay/backend/pkg/analytics/db" "openreplay/backend/pkg/db/postgres/pool" "openreplay/backend/pkg/logger" ) type Charts interface { - GetData(projectId int, userId uint64, req *GetCardChartDataRequest) ([]DataPoint, error) + GetData(projectId int, userId uint64, req *MetricPayload) (interface{}, error) } type chartsImpl struct { log logger.Logger pgconn pool.Pool - chConn driver.Conn + chConn db.Connector } -func New(log logger.Logger, conn pool.Pool) (Charts, error) { +func New(log logger.Logger, conn pool.Pool, chConn db.Connector) (Charts, error) { return &chartsImpl{ log: log, pgconn: conn, + chConn: chConn, }, nil } // GetData def get_chart() -func (s *chartsImpl) GetData(projectId int, userID uint64, req *GetCardChartDataRequest) ([]DataPoint, error) { +func (s *chartsImpl) GetData(projectId int, userID uint64, req *MetricPayload) (interface{}, error) { if req == nil { return nil, fmt.Errorf("request is empty") } - switch { - case req.MetricType == "funnel": - return nil, fmt.Errorf("funnel metric type is not supported yet") - case req.MetricType == "heatMap": - return nil, fmt.Errorf("heatMap metric type is not supported yet") - case req.MetricType == "pathAnalysis": - return nil, fmt.Errorf("pathAnalysis metric type is not supported yet") - - case req.MetricType == "timeseries": - return s.getTimeseriesCharts(projectId, userID, req) - case req.MetricType == "table": - return nil, fmt.Errorf("table metric type is not supported yet") - - case req.MetricType == "errors": - fallthrough - case req.MetricType == "performance": - fallthrough - case req.MetricType == "resources": - fallthrough - case req.MetricType == "webVitals": - return s.getMetric(projectId, userID, req) - - case req.MetricType == "retention": - return nil, fmt.Errorf("retention metric type is not supported yet") - case req.MetricType == "stickiness": - return nil, fmt.Errorf("stickiness metric type is not supported yet") + payload := &Payload{ + ProjectId: projectId, + UserId: userID, + MetricPayload: req, } - jsonInput := ` - { - "data": [ - { - "timestamp": 1733934939000, - "Series A": 100, - "Series B": 200 - }, - { - "timestamp": 1733935939000, - "Series A": 150, - "Series B": 250 - } - ] - }` - - var resp GetCardChartDataResponse - if err := json.Unmarshal([]byte(jsonInput), &resp); err != nil { - return nil, fmt.Errorf("failed to unmarshal response: %w", err) + qb, err := NewQueryBuilder(payload) + if err != nil { + log.Fatalf("Error creating query builder: %v", err) } - return resp.Data, nil -} - -func (s *chartsImpl) getMetric(projectID int, userID uint64, req *GetCardChartDataRequest) ([]DataPoint, error) { - switch req.MetricOf { - case "countSessions": // metrics.get_processed_sessions - return nil, fmt.Errorf("countSessions metric type is not supported yet") - case "avgVisitedPages": // metrics.get_user_activity_avg_visited_pages - return nil, fmt.Errorf("avgVisitedPages metric type is not supported yet") - case "countRequests": // metrics.get_top_metrics_count_requests - return nil, fmt.Errorf("countRequests metric type is not supported yet") - case "impactedSessionsByJsErrors": // metrics.get_impacted_sessions_by_js_errors - return nil, fmt.Errorf("impactedSessionsByJsErrors metric type is not supported yet") - case "domainsErrors4xx": // metrics.get_domains_errors_4xx - return nil, fmt.Errorf("domainsErrors4xx metric type is not supported yet") - case "domainsErrors5xx": // metrics.get_domains_errors_5xx - return nil, fmt.Errorf("domainsErrors5xx metric type is not supported yet") - case "errorsPerDomains": // metrics.get_errors_per_domains - return nil, fmt.Errorf("errorsPerDomains metric type is not supported yet") - case "errorsPerType": // metrics.get_errors_per_type - return nil, fmt.Errorf("errorsPerType metric type is not supported yet") - - } - return nil, fmt.Errorf("metric type is not supported yet") - -} - -func (s *chartsImpl) getTimeseriesCharts(projectID int, userID uint64, req *GetCardChartDataRequest) ([]DataPoint, error) { - var dataPoints []DataPoint - var stepSize = getStepSize(req.StartTimestamp, req.EndTimestamp, req.Density, true, 1000) - var query string - - switch req.MetricOf { - case "sessionCount": - query = fmt.Sprintf(` - SELECT - toUnixTimestamp(toStartOfInterval(processed_sessions.datetime, INTERVAL %d second)) * 1000 AS timestamp, - COUNT(processed_sessions.session_id) AS count - FROM ( - SELECT - s.session_id AS session_id, - s.datetime AS datetime - %s - ) AS processed_sessions - GROUP BY timestamp - ORDER BY timestamp; - `, stepSize, "query_part") // Replace "query_part" with the actual query part - default: - return nil, fmt.Errorf("unsupported metric: %s", req.MetricOf) - } - - fmt.Printf("stepSize: %v\n", stepSize) - - for _, series := range req.Series { - res, err := s.searchSeries(projectID, series) - if err != nil { - return nil, fmt.Errorf("failed to search series: %w", err) - } - if seriesData, ok := res.([]DataPoint); ok { - dataPoints = append(dataPoints, seriesData...) - } else { - return nil, fmt.Errorf("unexpected data format from searchSeries") - } - } - return dataPoints, nil -} - -func (s *chartsImpl) searchSeries(projectID int, series cards.CardSeries) (interface{}, error) { - - // Placeholder implementation - return []DataPoint{}, nil + resp, err := qb.Execute(payload, s.chConn) + if err != nil { + log.Fatalf("Error building query: %v", err) + } + + return resp, nil } diff --git a/backend/pkg/analytics/charts/counters.go b/backend/pkg/analytics/charts/counters.go index 431104b03..520ca7163 100644 --- a/backend/pkg/analytics/charts/counters.go +++ b/backend/pkg/analytics/charts/counters.go @@ -1,7 +1,6 @@ package charts import ( - "context" "fmt" "log" "strconv" @@ -162,7 +161,7 @@ def get_main_sessions_table(timestamp=0): and timestamp and timestamp >= TimeUTC.now(delta_days=-7) else "experimental.sessions" */ func getMainSessionsTable(timestamp int64) string { - return "product_analytics.sessions" + return "experimental.sessions" } // Function to convert named parameters to positional parameters @@ -272,7 +271,7 @@ func progress(oldVal, newVal uint64) float64 { } // Trying to find a common part -func parse(projectID uint64, startTs, endTs int64, density int, args map[string]interface{}) ([]string, []string, map[string]interface{}) { +func parse(projectID int, startTs, endTs int64, density int, args map[string]interface{}) ([]string, []string, map[string]interface{}) { stepSize := getStepSize(startTs, endTs, density, false, 1000) chSubQuery := getBasicConstraints("sessions", true, false, args, "project_id") chSubQueryChart := getBasicConstraints("sessions", true, true, args, "project_id") @@ -293,134 +292,136 @@ func parse(projectID uint64, startTs, endTs int64, density int, args map[string] } // Sessions trend -func (s *chartsImpl) getProcessedSessions(projectID uint64, startTs, endTs int64, density int, args map[string]interface{}) { - chQuery := ` - SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, - COUNT(DISTINCT sessions.session_id) AS value - FROM :main_sessions_table AS sessions - WHERE :sub_query_chart - GROUP BY timestamp - ORDER BY timestamp; - ` - chSubQuery, chSubQueryChart, params := parse(projectID, startTs, endTs, density, args) - - chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) - chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) - - preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) - rows, err := s.chConn.Query(context.Background(), preparedQuery, preparedArgs) - if err != nil { - log.Fatalf("Error executing query: %v", err) - } - preparedRows := make([]map[string]interface{}, 0) - var sum uint64 - for rows.Next() { - var timestamp, value uint64 - if err := rows.Scan(×tamp, &value); err != nil { - log.Fatalf("Error scanning row: %v", err) - } - fmt.Printf("Timestamp: %d, Value: %d\n", timestamp, value) - sum += value - preparedRows = append(preparedRows, map[string]interface{}{"timestamp": timestamp, "value": value}) - } - - results := map[string]interface{}{ - "value": sum, - "chart": CompleteMissingSteps(startTs, endTs, int(density), map[string]interface{}{"value": 0}, preparedRows, "timestamp", 1000), - } - - diff := endTs - startTs - endTs = startTs - startTs = endTs - diff - - log.Println(results) - - chQuery = fmt.Sprintf(` - SELECT COUNT(1) AS count - FROM :main_sessions_table AS sessions - WHERE :sub_query_chart; - `) - chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) - chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQuery, " AND "), -1) - - var count uint64 - - preparedQuery, preparedArgs = replaceNamedParams(chQuery, params) - if err := s.chConn.QueryRow(context.Background(), preparedQuery, preparedArgs).Scan(&count); err != nil { - log.Fatalf("Error executing query: %v", err) - } - - results["progress"] = progress(count, results["value"].(uint64)) - - // TODO: this should be returned in any case - results["unit"] = "COUNT" - fmt.Println(results) -} - -// Users trend -func (c *chartsImpl) getUniqueUsers(projectID uint64, startTs, endTs uint64, density uint64, args map[string]interface{}) { - chQuery := ` - SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, - COUNT(DISTINCT sessions.user_id) AS value - FROM :main_sessions_table AS sessions - WHERE :sub_query_chart - GROUP BY timestamp - ORDER BY timestamp; - ` - chSubQuery, chSubQueryChart, params := parse(projectID, startTs, endTs, density, args) - chSubQueryChart = append(chSubQueryChart, []string{"isNotNull(sessions.user_id)", "sessions.user_id!=''"}...) - - chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) - chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) - - preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) - rows, err := c.chConn.Query(context.Background(), preparedQuery, preparedArgs) - if err != nil { - log.Fatalf("Error executing query: %v", err) - } - preparedRows := make([]map[string]interface{}, 0) - var sum uint64 - for rows.Next() { - var timestamp, value uint64 - if err := rows.Scan(×tamp, &value); err != nil { - log.Fatalf("Error scanning row: %v", err) - } - fmt.Printf("Timestamp: %d, Value: %d\n", timestamp, value) - sum += value - preparedRows = append(preparedRows, map[string]interface{}{"timestamp": timestamp, "value": value}) - } - - results := map[string]interface{}{ - "value": sum, - "chart": CompleteMissingSteps(startTs, endTs, int(density), map[string]interface{}{"value": 0}, preparedRows, "timestamp", 1000), - } - - diff := endTs - startTs - endTs = startTs - startTs = endTs - diff - - log.Println(results) - - chQuery = fmt.Sprintf(` - SELECT COUNT(DISTINCT user_id) AS count - FROM :main_sessions_table AS sessions - WHERE :sub_query_chart; - `) - chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) - chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQuery, " AND "), -1) - - var count uint64 - - preparedQuery, preparedArgs = replaceNamedParams(chQuery, params) - if err := c.chConn.QueryRow(context.Background(), preparedQuery, preparedArgs).Scan(&count); err != nil { - log.Fatalf("Error executing query: %v", err) - } - - results["progress"] = progress(count, results["value"].(uint64)) - - // TODO: this should be returned in any case - results["unit"] = "COUNT" - fmt.Println(results) - - return -} +//func (s *chartsImpl) getProcessedSessions(projectID int, startTs, endTs int64, density int, args map[string]interface{}) (interface{}, error) { +// chQuery := ` +// SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, +// COUNT(DISTINCT sessions.session_id) AS value +// FROM :main_sessions_table AS sessions +// WHERE :sub_query_chart +// GROUP BY timestamp +// ORDER BY timestamp; +// ` +// chSubQuery, chSubQueryChart, params := parse(projectID, startTs, endTs, density, args) +// +// chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) +// chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) +// +// preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) +// rows, err := s.chConn.Query(context.Background(), preparedQuery, preparedArgs) +// if err != nil { +// log.Fatalf("Error executing query: %v", err) +// } +// preparedRows := make([]map[string]interface{}, 0) +// var sum uint64 +// for rows.Next() { +// var timestamp, value uint64 +// if err := rows.Scan(×tamp, &value); err != nil { +// log.Fatalf("Error scanning row: %v", err) +// } +// fmt.Printf("Timestamp: %d, Value: %d\n", timestamp, value) +// sum += value +// preparedRows = append(preparedRows, map[string]interface{}{"timestamp": timestamp, "value": value}) +// } +// +// results := map[string]interface{}{ +// "value": sum, +// "chart": CompleteMissingSteps(startTs, endTs, int(density), map[string]interface{}{"value": 0}, preparedRows, "timestamp", 1000), +// } +// +// diff := endTs - startTs +// endTs = startTs +// startTs = endTs - diff +// +// log.Println(results) +// +// chQuery = fmt.Sprintf(` +// SELECT COUNT(1) AS count +// FROM :main_sessions_table AS sessions +// WHERE :sub_query_chart; +// `) +// chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) +// chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQuery, " AND "), -1) +// +// var count uint64 +// +// preparedQuery, preparedArgs = replaceNamedParams(chQuery, params) +// if err := s.chConn.QueryRow(context.Background(), preparedQuery, preparedArgs).Scan(&count); err != nil { +// log.Fatalf("Error executing query: %v", err) +// } +// +// results["progress"] = progress(count, results["value"].(uint64)) +// +// // TODO: this should be returned in any case +// results["unit"] = "COUNT" +// fmt.Println(results) +// +// return results, nil +//} +// +//// Users trend +//func (s *chartsImpl) getUniqueUsers(projectID int, startTs, endTs int64, density int, args map[string]interface{}) (interface{}, error) { +// chQuery := ` +// SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL :step_size second)) * 1000 AS timestamp, +// COUNT(DISTINCT sessions.user_id) AS value +// FROM :main_sessions_table AS sessions +// WHERE :sub_query_chart +// GROUP BY timestamp +// ORDER BY timestamp; +// ` +// chSubQuery, chSubQueryChart, params := parse(projectID, startTs, endTs, density, args) +// chSubQueryChart = append(chSubQueryChart, []string{"isNotNull(sessions.user_id)", "sessions.user_id!=''"}...) +// +// chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) +// chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQueryChart, " AND "), -1) +// +// preparedQuery, preparedArgs := replaceNamedParams(chQuery, params) +// rows, err := s.chConn.Query(context.Background(), preparedQuery, preparedArgs) +// if err != nil { +// log.Fatalf("Error executing query: %v", err) +// } +// preparedRows := make([]map[string]interface{}, 0) +// var sum uint64 +// for rows.Next() { +// var timestamp, value uint64 +// if err := rows.Scan(×tamp, &value); err != nil { +// log.Fatalf("Error scanning row: %v", err) +// } +// fmt.Printf("Timestamp: %d, Value: %d\n", timestamp, value) +// sum += value +// preparedRows = append(preparedRows, map[string]interface{}{"timestamp": timestamp, "value": value}) +// } +// +// results := map[string]interface{}{ +// "value": sum, +// "chart": CompleteMissingSteps(startTs, endTs, int(density), map[string]interface{}{"value": 0}, preparedRows, "timestamp", 1000), +// } +// +// diff := endTs - startTs +// endTs = startTs +// startTs = endTs - diff +// +// log.Println(results) +// +// chQuery = fmt.Sprintf(` +// SELECT COUNT(DISTINCT user_id) AS count +// FROM :main_sessions_table AS sessions +// WHERE :sub_query_chart; +// `) +// chQuery = strings.Replace(chQuery, ":main_sessions_table", getMainSessionsTable(startTs), -1) +// chQuery = strings.Replace(chQuery, ":sub_query_chart", strings.Join(chSubQuery, " AND "), -1) +// +// var count uint64 +// +// preparedQuery, preparedArgs = replaceNamedParams(chQuery, params) +// if err := s.chConn.QueryRow(context.Background(), preparedQuery, preparedArgs).Scan(&count); err != nil { +// log.Fatalf("Error executing query: %v", err) +// } +// +// results["progress"] = progress(count, results["value"].(uint64)) +// +// // TODO: this should be returned in any case +// results["unit"] = "COUNT" +// fmt.Println(results) +// +// return results, nil +//} diff --git a/backend/pkg/analytics/charts/handlers.go b/backend/pkg/analytics/charts/handlers.go index 2b35c5b3c..8e759ea91 100644 --- a/backend/pkg/analytics/charts/handlers.go +++ b/backend/pkg/analytics/charts/handlers.go @@ -74,7 +74,7 @@ func (e *handlersImpl) getCardChartData(w http.ResponseWriter, r *http.Request) } bodySize = len(bodyBytes) - req := &GetCardChartDataRequest{} + req := &MetricPayload{} if err := json.Unmarshal(bodyBytes, req); err != nil { e.responser.ResponseWithError(e.log, r.Context(), w, http.StatusBadRequest, err, startTime, r.URL.Path, bodySize) return diff --git a/backend/pkg/analytics/charts/metric_funnel.go b/backend/pkg/analytics/charts/metric_funnel.go new file mode 100644 index 000000000..0f58c886d --- /dev/null +++ b/backend/pkg/analytics/charts/metric_funnel.go @@ -0,0 +1,9 @@ +package charts + +import "openreplay/backend/pkg/analytics/db" + +type FunnelQueryBuilder struct{} + +func (f FunnelQueryBuilder) Execute(p *Payload, conn db.Connector) (interface{}, error) { + return "-- Funnel query placeholder", nil +} diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go new file mode 100644 index 000000000..46135fd55 --- /dev/null +++ b/backend/pkg/analytics/charts/metric_table.go @@ -0,0 +1,253 @@ +package charts + +import ( + "fmt" + "openreplay/backend/pkg/analytics/db" + "strings" +) + +type TableQueryBuilder struct{} + +func (t TableQueryBuilder) Execute(p *Payload, conn db.Connector) (interface{}, error) { + return t.buildQuery(p) +} + +func (t TableQueryBuilder) buildQuery(r *Payload) (string, error) { + s := r.Series[0] + sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) + sessionWhere := buildSessionWhere(sessionFilters) + eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) + + subQuery := fmt.Sprintf( + "SELECT %s,\n"+ + " MIN(%s) AS first_event_ts,\n"+ + " MAX(%s) AS last_event_ts\n"+ + "FROM %s AS main\n"+ + "WHERE main.project_id = %%(project_id)s\n"+ + " AND %s >= toDateTime(%%(start_time)s/1000)\n"+ + " AND %s <= toDateTime(%%(end_time)s/1000)\n"+ + " AND (%s)\n"+ + "GROUP BY %s\n"+ + "HAVING %s", + ColEventSessionID, + ColEventTime, + ColEventTime, + TableEvents, + ColEventTime, + ColEventTime, + strings.Join(eventWhere, " OR "), + ColEventSessionID, + seqHaving, + ) + + joinQuery := fmt.Sprintf( + "SELECT *\n"+ + "FROM %s AS s\n"+ + "INNER JOIN (\n"+ + " SELECT DISTINCT ev.session_id, ev.`$current_url` AS url_path\n"+ + " FROM %s AS ev\n"+ + " WHERE ev.created_at >= toDateTime(%%(start_time)s/1000)\n"+ + " AND ev.created_at <= toDateTime(%%(end_time)s/1000)\n"+ + " AND ev.project_id = %%(project_id)s\n"+ + " AND ev.`$event_name` = 'LOCATION'\n"+ + ") AS extra_event USING (session_id)\n"+ + "WHERE s.project_id = %%(project_id)s\n"+ + " AND isNotNull(s.duration)\n"+ + " AND s.datetime >= toDateTime(%%(start_time)s/1000)\n"+ + " AND s.datetime <= toDateTime(%%(end_time)s/1000)\n", + TableSessions, + TableEvents, + ) + + if len(sessionWhere) > 0 { + joinQuery += " AND " + strings.Join(sessionWhere, " AND ") + "\n" + } + + main := fmt.Sprintf( + "SELECT s.session_id AS session_id, s.url_path\n"+ + "FROM (\n%s\n) AS f\n"+ + "INNER JOIN (\n%s) AS s\n"+ + " ON (s.session_id = f.session_id)\n", + subQuery, + joinQuery, + ) + + final := fmt.Sprintf( + "SELECT COUNT(DISTINCT url_path) OVER () AS main_count,\n"+ + " url_path AS name,\n"+ + " COUNT(DISTINCT session_id) AS total,\n"+ + " COALESCE(SUM(COUNT(DISTINCT session_id)) OVER (), 0) AS total_count\n"+ + "FROM (\n%s) AS filtered_sessions\n"+ + "GROUP BY url_path\n"+ + "ORDER BY total DESC\n"+ + "LIMIT 200 OFFSET 0;", + main, + ) + + return final, nil +} + +func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters []Filter) { + for _, f := range filters { + if f.IsEvent { + eventFilters = append(eventFilters, f) + } else { + sessionFilters = append(sessionFilters, f) + } + } + return +} + +func buildSessionWhere(filters []Filter) []string { + var conds []string + for _, f := range filters { + switch f.Type { + case FilterUserCountry: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value))) + case FilterUserCity: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value))) + case FilterUserState: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value))) + case FilterUserId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value))) + case FilterUserAnonymousId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value))) + case FilterUserOs: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value))) + case FilterUserBrowser: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value))) + case FilterUserDevice: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value))) + case FilterPlatform: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value))) + case FilterRevId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value))) + case FilterReferrer: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value))) + case FilterDuration: + if len(f.Value) == 2 { + conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0])) + conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1])) + } + case FilterUtmSource: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value))) + case FilterUtmMedium: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value))) + case FilterUtmCampaign: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value))) + case FilterMetadata: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value))) + } + } + // adding /n to each condition for better readability, can be removed. + for i := range conds { + conds[i] += "\n" + } + return conds +} + +func concatValues(v []string) string { + return strings.Join(v, "") +} + +func buildEventsWhere(filters []Filter, order EventOrder) (eventConditions []string, having string) { + basicEventTypes := "(" + + strings.Join([]string{ + fmt.Sprintf("%s = 'CLICK'", ColEventName), + fmt.Sprintf("%s = 'INPUT'", ColEventName), + fmt.Sprintf("%s = 'LOCATION'", ColEventName), + fmt.Sprintf("%s = 'CUSTOM'", ColEventName), + fmt.Sprintf("%s = 'REQUEST'", ColEventName), + }, " OR ") + ")" + + var seq []string + for _, f := range filters { + switch f.Type { + case FilterClick: + seq = append(seq, seqCond("CLICK", "selector", f)) + case FilterInput: + seq = append(seq, seqCond("INPUT", "label", f)) + case FilterLocation: + seq = append(seq, seqCond("LOCATION", "url_path", f)) + case FilterCustom: + seq = append(seq, seqCond("CUSTOM", "name", f)) + case FilterFetch: + seq = append(seq, seqFetchCond("REQUEST", f)) + case FilterFetchStatusCode: + seq = append(seq, seqCond("REQUEST", "status", f)) + default: + seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type)))) + } + } + eventConditions = []string{basicEventTypes} + + // then => sequenceMatch + // or => OR + // and => AND + switch order { + case EventOrderThen: + var pattern []string + for i := range seq { + pattern = append(pattern, fmt.Sprintf("(?%d)", i+1)) + } + having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)", + strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n")) + case EventOrderAnd: + // build AND + having = strings.Join(seq, " AND ") + default: + // default => OR + var orParts []string + for _, p := range seq { + orParts = append(orParts, "("+p+")") + } + having = strings.Join(orParts, " OR ") + } + return +} + +func seqCond(eventName, key string, f Filter) string { + op := parseOperator(f.Operator) + return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')", + ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value)) +} + +func seqFetchCond(eventName string, f Filter) string { + w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))} + var extras []string + for _, c := range f.Filters { + switch c.Type { + case FilterFetch: + if len(c.Value) > 0 { + extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value))) + } + case FilterFetchStatusCode: + if len(c.Value) > 0 { + extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value))) + } + default: + // placeholder if needed + } + } + if len(extras) > 0 { + w = append(w, strings.Join(extras, " AND ")) + } + return "(" + strings.Join(w, " AND ") + ")" +} + +func parseOperator(op string) string { + // TODO implement this properly + switch strings.ToLower(op) { + case OperatorStringContains: + return "LIKE" + case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny: + return "=" + case OperatorStringStartsWith: + return "LIKE" + case OperatorStringEndsWith: + // might interpret differently in real impl + return "=" + default: + return "=" + } +} diff --git a/backend/pkg/analytics/charts/metric_timeseries.go b/backend/pkg/analytics/charts/metric_timeseries.go new file mode 100644 index 000000000..cc1df46c3 --- /dev/null +++ b/backend/pkg/analytics/charts/metric_timeseries.go @@ -0,0 +1,116 @@ +package charts + +import ( + "fmt" + "log" + "openreplay/backend/pkg/analytics/db" +) + +type TimeSeriesQueryBuilder struct{} + +func (t TimeSeriesQueryBuilder) Execute(p *Payload, conn db.Connector) (interface{}, error) { + query, err := t.buildQuery(p) + if err != nil { + log.Fatalf("Error building query: %v", err) + return nil, err + } + + rows, err := conn.Query(query) + if err != nil { + log.Fatalf("Error executing query: %v", err) + return nil, err + } + defer rows.Close() + + var results []DataPoint + + for rows.Next() { + var res DataPoint + if err := rows.Scan(&res.Timestamp, &res.Count); err != nil { + return nil, err + } + //sum += res.Count + results = append(results, res) + } + + filled := FillMissingDataPoints(p.StartTimestamp, p.EndTimestamp, p.Density, DataPoint{}, results, 1000) + return filled, nil +} + +func (t TimeSeriesQueryBuilder) buildQuery(p *Payload) (string, error) { + query := "" + switch p.MetricOf { + case "sessionCount": + query = t.buildSessionCountQuery(p) + case "userCount": + query = t.buildUserCountQuery(p) + default: + query = "" + } + return query, nil +} + +func (TimeSeriesQueryBuilder) buildSessionCountQuery(p *Payload) string { + stepSize := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) + subquery := buildEventSubquery(p) + return fmt.Sprintf(`SELECT toUnixTimestamp( + toStartOfInterval(processed_sessions.datetime, INTERVAL %d second) +) * 1000 AS timestamp, +COUNT(processed_sessions.session_id) AS count +FROM ( + %s +) AS processed_sessions +GROUP BY timestamp +ORDER BY timestamp;`, stepSize, subquery) +} + +func (TimeSeriesQueryBuilder) buildUserCountQuery(p *Payload) string { + stepSize := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) + subquery := buildEventSubquery(p) + return fmt.Sprintf(`SELECT toUnixTimestamp( + toStartOfInterval(processed_sessions.datetime, INTERVAL %d second) +) * 1000 AS timestamp, +COUNT(DISTINCT processed_sessions.user_id) AS count +FROM ( + %s +) AS processed_sessions +GROUP BY timestamp +ORDER BY timestamp;`, stepSize, subquery) +} + +func FillMissingDataPoints( + startTime, endTime int64, + density int, + neutral DataPoint, + rows []DataPoint, + timeCoefficient int64, +) []DataPoint { + if density <= 1 { + return rows + } + + stepSize := uint64(getStepSize(startTime, endTime, density, false, 1000)) + bucketSize := stepSize * uint64(timeCoefficient) + + lookup := make(map[uint64]DataPoint) + for _, dp := range rows { + if dp.Timestamp < uint64(startTime) { + continue + } + bucket := uint64(startTime) + (((dp.Timestamp - uint64(startTime)) / bucketSize) * bucketSize) + lookup[bucket] = dp + } + + results := make([]DataPoint, 0, density) + for i := 0; i < density; i++ { + ts := uint64(startTime) + uint64(i)*bucketSize + if dp, ok := lookup[ts]; ok { + results = append(results, dp) + } else { + nd := neutral + nd.Timestamp = ts + results = append(results, nd) + } + } + return results +} diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 0ff630f2f..721008ea8 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -1,57 +1,155 @@ package charts -import "openreplay/backend/pkg/analytics/cards" - -type DataPoint struct { - Timestamp int64 `json:"timestamp"` - Series map[string]int64 `json:"series"` -} - -type GetCardChartDataRequest struct { - StartTimestamp int64 `json:"startTimestamp" validate:"required"` - EndTimestamp int64 `json:"endTimestamp" validate:"required"` - Density int `json:"density" validate:"required"` - MetricType string `json:"metricType" validate:"required,oneof=timeseries table funnel errors performance resources webVitals pathAnalysis retention stickiness heatMap"` - MetricOf string `json:"metricOf" validate:"required,oneof=sessionCount userCount"` - ViewType string `json:"viewType" validate:"required,oneof=lineChart areaChart barChart pieChart progressChart table metric"` - MetricFormat string `json:"metricFormat" validate:"required,oneof=default percentage"` - SessionID int64 `json:"sessionId"` - Series []cards.CardSeries `json:"series" validate:"required,dive"` -} - -type GetCardChartDataResponse struct { - Data []DataPoint `json:"data"` -} - +type Table string +type Column string type MetricType string -type MetricOfTimeseries string -type MetricOfTable string +type FilterType string +type EventType string +type EventOrder string const ( - MetricTypeTimeseries MetricType = "TIMESERIES" - MetricTypeTable MetricType = "TABLE" - - MetricOfTimeseriesSessionCount MetricOfTimeseries = "SESSION_COUNT" - MetricOfTimeseriesUserCount MetricOfTimeseries = "USER_COUNT" - - MetricOfTableVisitedURL MetricOfTable = "VISITED_URL" - MetricOfTableIssues MetricOfTable = "ISSUES" - MetricOfTableUserCountry MetricOfTable = "USER_COUNTRY" - MetricOfTableUserDevice MetricOfTable = "USER_DEVICE" - MetricOfTableUserBrowser MetricOfTable = "USER_BROWSER" + TableEvents Table = "product_analytics.events" + TableSessions Table = "experimental.sessions" ) -type SessionsSearchPayload struct { - StartTimestamp int64 - EndTimestamp int64 - Filters []SessionSearchFilter +const ( + ColEventTime Column = "main.created_at" + ColEventName Column = "main.`$event_name`" + ColEventProjectID Column = "main.project_id" + ColEventProperties Column = "main.`$properties`" + ColEventSessionID Column = "main.session_id" + ColEventURLPath Column = "main.url_path" + ColEventStatus Column = "main.status" +) + +const ( + ColSessionID Column = "s.session_id" + ColDuration Column = "s.duration" + ColUserCountry Column = "s.user_country" + ColUserCity Column = "s.user_city" + ColUserState Column = "s.user_state" + ColUserID Column = "s.user_id" + ColUserAnonymousID Column = "s.user_anonymous_id" + ColUserOS Column = "s.user_os" + ColUserBrowser Column = "s.user_browser" + ColUserDevice Column = "s.user_device" + ColUserDeviceType Column = "s.user_device_type" + ColRevID Column = "s.rev_id" + ColBaseReferrer Column = "s.base_referrer" + ColUtmSource Column = "s.utm_source" + ColUtmMedium Column = "s.utm_medium" + ColUtmCampaign Column = "s.utm_campaign" + ColMetadata1 Column = "s.metadata_1" + ColSessionProjectID Column = "s.project_id" + ColSessionIsNotNull Column = "isNotNull(s.duration)" +) + +const ( + MetricTypeTimeseries MetricType = "timeseries" + MetricTypeTable MetricType = "table" + MetricTypeFunnel MetricType = "funnel" +) + +const ( + EventOrderThen EventOrder = "then" + EventOrderOr EventOrder = "or" + EventOrderAnd EventOrder = "and" +) + +type MetricPayload struct { + StartTimestamp int64 `json:"startTimestamp"` + EndTimestamp int64 `json:"endTimestamp"` + Density int `json:"density"` + MetricOf string `json:"metricOf"` + MetricType MetricType `json:"metricType"` + MetricFormat string `json:"metricFormat"` + ViewType string `json:"viewType"` + Name string `json:"name"` + Series []Series `json:"series"` } -type SessionSearchFilter struct { - Type FilterType - Value interface{} - Operator SearchEventOperator +type Series struct { + Name string `json:"name"` + Filter struct { + Filters []Filter `json:"filters"` + EventsOrder EventOrder `json:"eventsOrder"` + } `json:"filter"` } -type SearchEventOperator string // Define constants as needed -type FilterType string // Define constants as needed +type Filter struct { + Type FilterType `json:"type"` + IsEvent bool `json:"isEvent"` + Value []string `json:"value"` + Operator string `json:"operator"` + Filters []Filter `json:"filters"` +} + +const ( + FilterUserId FilterType = "userId" + FilterUserAnonymousId FilterType = "userAnonymousId" + FilterReferrer FilterType = "referrer" + FilterDuration FilterType = "duration" + FilterUtmSource FilterType = "utmSource" + FilterUtmMedium FilterType = "utmMedium" + FilterUtmCampaign FilterType = "utmCampaign" + FilterUserCountry FilterType = "userCountry" + FilterUserCity FilterType = "userCity" + FilterUserState FilterType = "userState" + FilterUserOs FilterType = "userOs" + FilterUserBrowser FilterType = "userBrowser" + FilterUserDevice FilterType = "userDevice" + FilterPlatform FilterType = "platform" + FilterRevId FilterType = "revId" + FilterIssue FilterType = "issue" + FilterMetadata FilterType = "metadata" +) + +// Event filters +const ( + FilterClick FilterType = "click" + FilterInput FilterType = "input" + FilterLocation FilterType = "location" + FilterCustom FilterType = "customEvent" + FilterFetch FilterType = "fetch" + FilterFetchStatusCode FilterType = "status" + FilterTag FilterType = "tag" + FilterNetworkRequest FilterType = "fetch" + FilterGraphQLRequest FilterType = "graphql" + FilterStateAction FilterType = "stateAction" + FilterError FilterType = "error" + FilterAvgCpuLoad FilterType = "avgCpuLoad" + FilterAvgMemoryUsage FilterType = "avgMemoryUsage" +) + +// MOBILE FILTERS +const ( + FilterUserOsIos FilterType = "userOsIos" + FilterUserDeviceIos FilterType = "userDeviceIos" + FilterUserCountryIos FilterType = "userCountryIos" + FilterUserIdIos FilterType = "userIdIos" + FilterUserAnonymousIdIos FilterType = "userAnonymousIdIos" + FilterRevIdIos FilterType = "revIdIos" +) + +const ( + OperatorStringIs = "is" + OperatorStringIsAny = "isAny" + OperatorStringOn = "on" + OperatorStringOnAny = "onAny" + OperatorStringIsNot = "isNot" + OperatorStringIsUndefined = "isUndefined" + OperatorStringNotOn = "notOn" + OperatorStringContains = "contains" + OperatorStringNotContains = "notContains" + OperatorStringStartsWith = "startsWith" + OperatorStringEndsWith = "endsWith" +) + +type DataPoint struct { + Timestamp uint64 `json:"timestamp"` + Count uint64 `json:"count"` +} + +//type TimeseriesResponse struct { +// Data []DataPoint `json:"data"` +//} diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go new file mode 100644 index 000000000..1d82bd5da --- /dev/null +++ b/backend/pkg/analytics/charts/query.go @@ -0,0 +1,150 @@ +package charts + +import ( + "fmt" + "openreplay/backend/pkg/analytics/db" + "strings" +) + +type Payload struct { + *MetricPayload + ProjectId int + UserId uint64 +} + +type QueryBuilder interface { + Execute(p *Payload, conn db.Connector) (interface{}, error) +} + +func NewQueryBuilder(p *Payload) (QueryBuilder, error) { + switch p.MetricType { + case MetricTypeTimeseries: + return TimeSeriesQueryBuilder{}, nil + case MetricTypeFunnel: + return FunnelQueryBuilder{}, nil + case MetricTypeTable: + return TableQueryBuilder{}, nil + default: + return nil, fmt.Errorf("unknown metric type: %s", p.MetricType) + } +} + +func buildEventSubquery(p *Payload) string { + baseEventsWhere := buildBaseEventsWhere(p) + sequenceCond := buildSequenceCondition(p.Series) + sessionsWhere := buildSessionsWhere(p) + + if sequenceCond.seqPattern == "" { + return fmt.Sprintf(` +SELECT s.%[1]s AS %[1]s, + s.datetime AS datetime +FROM ( + SELECT main.session_id, + MIN(main.created_at) AS first_event_ts, + MAX(main.created_at) AS last_event_ts + FROM product_analytics.events AS main + WHERE %[2]s + GROUP BY session_id +) AS f +INNER JOIN ( + SELECT * + FROM experimental.sessions AS s + WHERE %[3]s +) AS s ON (s.session_id = f.session_id) +`, pickIDField(p), baseEventsWhere, sessionsWhere) + } + + return fmt.Sprintf(` +SELECT s.%[1]s AS %[1]s, + s.datetime AS datetime +FROM ( + SELECT main.session_id, + MIN(main.created_at) AS first_event_ts, + MAX(main.created_at) AS last_event_ts + FROM product_analytics.events AS main + WHERE %[2]s + GROUP BY session_id + HAVING sequenceMatch('%[3]s')(toDateTime(main.created_at), %[4]s) +) AS f +INNER JOIN ( + SELECT * + FROM experimental.sessions AS s + WHERE %[5]s +) AS s ON (s.session_id = f.session_id) +`, pickIDField(p), baseEventsWhere, sequenceCond.seqPattern, sequenceCond.seqEvents, sessionsWhere) +} + +func pickIDField(p *Payload) string { + if p.MetricOf == "userCount" { + return "user_id" + } + return "session_id" +} + +func buildBaseEventsWhere(p *Payload) string { + ts := fmt.Sprintf( + `(main.created_at >= toDateTime(%d / 1000) AND main.created_at <= toDateTime(%d / 1000))`, + p.StartTimestamp, + p.EndTimestamp, + ) + return fmt.Sprintf(`main.project_id = %d AND %s`, p.ProjectId, ts) +} + +func buildSessionsWhere(p *Payload) string { + ts := fmt.Sprintf( + `(s.datetime >= toDateTime(%d / 1000) AND s.datetime <= toDateTime(%d / 1000))`, + p.StartTimestamp, + p.EndTimestamp, + ) + return fmt.Sprintf(`s.project_id = %d AND isNotNull(s.duration) AND %s`, p.ProjectId, ts) +} + +type sequenceParts struct { + seqPattern string + seqEvents string +} + +func buildSequenceCondition(series []Series) sequenceParts { + var events []string + for _, s := range series { + if len(s.Filter.Filters) > 0 { + events = append(events, buildOneSeriesSequence(s.Filter.Filters)) + } + } + if len(events) < 2 { + return sequenceParts{"", ""} + } + pattern := "" + for i := 1; i <= len(events); i++ { + pattern += fmt.Sprintf("(?%d)", i) + } + return sequenceParts{ + seqPattern: pattern, + seqEvents: strings.Join(events, ", "), + } +} + +func buildOneSeriesSequence(filters []Filter) string { + return strings.Join(buildFilterConditions(filters), " AND ") +} + +func buildFilterConditions(filters []Filter) []string { + var out []string + for _, f := range filters { + switch f.Type { + case FilterClick: + out = append(out, + fmt.Sprintf(`(main."$event_name" = 'CLICK' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, + strings.Join(f.Value, "','"))) + case FilterInput: + out = append(out, + fmt.Sprintf(`(main."$event_name" = 'INPUT' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, + strings.Join(f.Value, "','"))) + + default: + out = append(out, + fmt.Sprintf(`(main."$event_name" = '%s')`, strings.ToUpper(string(f.Type)))) + } + } + return out +} diff --git a/backend/pkg/analytics/db/connector.go b/backend/pkg/analytics/db/connector.go new file mode 100644 index 000000000..c06dfa998 --- /dev/null +++ b/backend/pkg/analytics/db/connector.go @@ -0,0 +1,64 @@ +package db + +import ( + "context" + "github.com/ClickHouse/clickhouse-go/v2" + "github.com/ClickHouse/clickhouse-go/v2/lib/driver" + "openreplay/backend/internal/config/common" + "time" +) + +type TableValue struct { + Name string `json:"name"` + Total uint64 `json:"total"` +} + +type TableResponse struct { + Total uint64 `json:"total"` + Count uint64 `json:"count"` + Values []TableValue `json:"values"` +} + +type Connector interface { + Stop() error + Query(query string) (driver.Rows, error) +} + +type connectorImpl struct { + conn driver.Conn +} + +func NewConnector(cfg common.Clickhouse) (Connector, error) { + conn, err := clickhouse.Open(&clickhouse.Options{ + Addr: []string{cfg.GetTrimmedURL()}, + Auth: clickhouse.Auth{ + Database: cfg.Database, + Username: cfg.LegacyUserName, + Password: cfg.LegacyPassword, + }, + MaxOpenConns: 20, + MaxIdleConns: 15, + ConnMaxLifetime: 3 * time.Minute, + Compression: &clickhouse.Compression{ + Method: clickhouse.CompressionLZ4, + }, + }) + if err != nil { + return nil, err + } + return &connectorImpl{conn: conn}, nil +} + +func (c *connectorImpl) Stop() error { + return c.conn.Close() +} + +func (c *connectorImpl) Query(query string) (driver.Rows, error) { + rows, err := c.conn.Query(context.Background(), query) + if err != nil { + return nil, err + } + //defer rows.Close() + + return rows, nil +} diff --git a/backend/pkg/analytics/query/chartQuery.go b/backend/pkg/analytics/query/chartQuery.go index 47724b53d..a9c69e0c0 100644 --- a/backend/pkg/analytics/query/chartQuery.go +++ b/backend/pkg/analytics/query/chartQuery.go @@ -1,649 +1,654 @@ -package main +package query -import ( - "encoding/json" - "fmt" - "strings" -) +//package main +// +//import ( +// "fmt" +// "strings" +//) +// +////func main() { +//// var r Root +//// //err := json.Unmarshal([]byte(jsonInput), &r) +//// //if err != nil { +//// // panic(err) +//// //} +//// // +//// ////fmt.Println("ARGS:", r) +//// //fmt.Println(buildQuery(r)) +//// ////fmt.Println("QUERY PART:", qp) +//// +//// builder := NewQueryBuilder() +//// query, err := builder.BuildQuery(r) +//// if err != nil { +//// fmt.Println("ERROR:", err) +//// } +//// +//// fmt.Println(query) +////} +// +//type Table string +//type Column string +//type FilterType string +//type EventOrder string +//type FetchFilterType string +// +//const ( +// UserOs FilterType = "userOs" +// UserBrowser FilterType = "userBrowser" +// UserDevice FilterType = "userDevice" +// UserCountry FilterType = "userCountry" +// UserCity FilterType = "userCity" +// UserState FilterType = "userState" +// UserId FilterType = "userId" +// UserAnonymousId FilterType = "userAnonymousId" +// Referrer FilterType = "referrer" +// RevId FilterType = "revId" +// UserOsIos FilterType = "userOsIos" +// UserDeviceIos FilterType = "userDeviceIos" +// UserCountryIos FilterType = "userCountryIos" +// UserIdIos FilterType = "userIdIos" +// UserAnonymousIdIos FilterType = "userAnonymousIdIos" +// RevIdIos FilterType = "revIdIos" +// Duration FilterType = "duration" +// Platform FilterType = "platform" +// Metadata FilterType = "metadata" +// Issue FilterType = "issue" +// EventsCount FilterType = "eventsCount" +// UtmSource FilterType = "utmSource" +// UtmMedium FilterType = "utmMedium" +// UtmCampaign FilterType = "utmCampaign" +// ThermalState FilterType = "thermalState" +// MainThreadCPU FilterType = "mainThreadCPU" +// ViewComponent FilterType = "viewComponent" +// LogEvent FilterType = "logEvent" +// ClickEvent FilterType = "clickEvent" +// MemoryUsage FilterType = "memoryUsage" +//) +// +//const ( +// Click FilterType = "click" +// Input FilterType = "input" +// Location FilterType = "location" +// Custom FilterType = "custom" +// Request FilterType = "request" +// Fetch FilterType = "fetch" +// GraphQL FilterType = "graphql" +// StateAction FilterType = "stateAction" +// Error FilterType = "error" +// Tag FilterType = "tag" +// ClickMobile FilterType = "clickMobile" +// InputMobile FilterType = "inputMobile" +// ViewMobile FilterType = "viewMobile" +// CustomMobile FilterType = "customMobile" +// RequestMobile FilterType = "requestMobile" +// ErrorMobile FilterType = "errorMobile" +// SwipeMobile FilterType = "swipeMobile" +//) +// +//const ( +// EventOrderThen EventOrder = "then" +// EventOrderOr EventOrder = "or" +// EventOrderAnd EventOrder = "and" +//) +// +//const ( +// FetchFilterTypeFetchUrl FilterType = "fetchUrl" +// FetchFilterTypeFetchStatusCode FilterType = "fetchStatusCode" +// FetchFilterTypeFetchMethod FilterType = "fetchMethod" +// FetchFilterTypeFetchDuration FilterType = "fetchDuration" +// FetchFilterTypeFetchRequestBody FilterType = "fetchRequestBody" +// FetchFilterTypeFetchResponseBody FilterType = "fetchResponseBody" +//) +// +//const ( +// OperatorStringIs = "is" +// OperatorStringIsAny = "isAny" +// OperatorStringOn = "on" +// OperatorStringOnAny = "onAny" +// OperatorStringIsNot = "isNot" +// OperatorStringIsUndefined = "isUndefined" +// OperatorStringNotOn = "notOn" +// OperatorStringContains = "contains" +// OperatorStringNotContains = "notContains" +// OperatorStringStartsWith = "startsWith" +// OperatorStringEndsWith = "endsWith" +//) +// +//const ( +// OperatorMathEq = "=" +// OperatorMathLt = "<" +// OperatorMathGt = ">" +// OperatorMathLe = "<=" +// OperatorMathGe = ">=" +//) +// +////-------------------------------------------------- +//// Constants for columns, tables, etc. +////-------------------------------------------------- +// +//const ( +// TableEvents Table = "product_analytics.events" +// TableSessions Table = "experimental.sessions" +// +// ColEventTime Column = "main.created_at" +// ColEventName Column = "main.`$event_name`" +// ColEventProjectID Column = "main.project_id" +// ColEventProperties Column = "main.`$properties`" +// ColEventSessionID Column = "main.session_id" +// ColEventURLPath Column = "main.url_path" +// ColEventStatus Column = "main.status" +// +// ColSessionID Column = "s.session_id" +// ColDuration Column = "s.duration" +// ColUserCountry Column = "s.user_country" +// ColUserCity Column = "s.user_city" +// ColUserState Column = "s.user_state" +// ColUserID Column = "s.user_id" +// ColUserAnonymousID Column = "s.user_anonymous_id" +// ColUserOS Column = "s.user_os" +// ColUserBrowser Column = "s.user_browser" +// ColUserDevice Column = "s.user_device" +// ColUserDeviceType Column = "s.user_device_type" +// ColRevID Column = "s.rev_id" +// ColBaseReferrer Column = "s.base_referrer" +// ColUtmSource Column = "s.utm_source" +// ColUtmMedium Column = "s.utm_medium" +// ColUtmCampaign Column = "s.utm_campaign" +// ColMetadata1 Column = "s.metadata_1" +// ColSessionProjectID Column = "s.project_id" +// ColSessionIsNotNull Column = "isNotNull(s.duration)" +//) +// +//type Root struct { +// StartTimestamp int64 `json:"startTimestamp"` +// EndTimestamp int64 `json:"endTimestamp"` +// Series []Series `json:"series"` +// ProjectID int64 `json:"projectId"` +//} +// +//type Series struct { +// SeriesID int64 `json:"seriesId"` +// Name string `json:"name"` +// Filter SeriesFilter `json:"filter"` +//} +// +//type SeriesFilter struct { +// Filters []FilterObj `json:"filters"` +// EventsOrder EventOrder `json:"eventsOrder"` +//} +// +//type FilterObj struct { +// Key string `json:"key"` +// Type FilterType `json:"type"` +// IsEvent bool `json:"isEvent"` +// Value []string `json:"value"` +// Operator string `json:"operator"` +// Source string `json:"source"` +// Filters []FilterObj `json:"filters"` +//} +// +//// -------------------------------------------------- +//func buildQuery(r Root) string { +// s := r.Series[0] +// +// // iterate over series and partition filters +// //for _, s := range r.Series { +// // sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) +// // sessionWhere := buildSessionWhere(sessionFilters) +// // eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) +// // fmt.Println("SESSION FILTERS:", sessionFilters) +// // fmt.Println("EVENT FILTERS:", eventFilters) +// // fmt.Println("SESSION WHERE:", sessionWhere) +// // fmt.Println("EVENT WHERE:", eventWhere) +// // fmt.Println("SEQ HAVING:", seqHaving) +// //} +// +// sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) +// sessionWhere := buildSessionWhere(sessionFilters) +// eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) +// +// subQuery := fmt.Sprintf( +// "SELECT %s,\n"+ +// " MIN(%s) AS first_event_ts,\n"+ +// " MAX(%s) AS last_event_ts\n"+ +// "FROM %s AS main\n"+ +// "WHERE main.project_id = %%(project_id)s\n"+ +// " AND %s >= toDateTime(%%(start_time)s/1000)\n"+ +// " AND %s <= toDateTime(%%(end_time)s/1000)\n"+ +// " AND (%s)\n"+ +// "GROUP BY %s\n"+ +// "HAVING %s", +// ColEventSessionID, +// ColEventTime, +// ColEventTime, +// TableEvents, +// ColEventTime, +// ColEventTime, +// strings.Join(eventWhere, " OR "), +// ColEventSessionID, +// seqHaving, +// ) +// +// joinQuery := fmt.Sprintf( +// "SELECT *\n"+ +// "FROM %s AS s\n"+ +// "INNER JOIN (\n"+ +// " SELECT DISTINCT ev.session_id, ev.`$current_url` AS url_path\n"+ +// " FROM %s AS ev\n"+ +// " WHERE ev.created_at >= toDateTime(%%(start_time)s/1000)\n"+ +// " AND ev.created_at <= toDateTime(%%(end_time)s/1000)\n"+ +// " AND ev.project_id = %%(project_id)s\n"+ +// " AND ev.`$event_name` = 'LOCATION'\n"+ +// ") AS extra_event USING (session_id)\n"+ +// "WHERE s.project_id = %%(project_id)s\n"+ +// " AND %s\n"+ +// " AND s.datetime >= toDateTime(%%(start_time)s/1000)\n"+ +// " AND s.datetime <= toDateTime(%%(end_time)s/1000)\n", +// TableSessions, +// TableEvents, +// ColSessionIsNotNull, +// ) +// +// if len(sessionWhere) > 0 { +// joinQuery += " AND " + strings.Join(sessionWhere, " AND ") + "\n" +// } +// +// main := fmt.Sprintf( +// "SELECT s.session_id AS session_id, s.url_path\n"+ +// "FROM (\n%s\n) AS f\n"+ +// "INNER JOIN (\n%s) AS s\n"+ +// " ON (s.session_id = f.session_id)\n", +// subQuery, +// joinQuery, +// ) +// +// final := fmt.Sprintf( +// "SELECT COUNT(DISTINCT url_path) OVER () AS main_count,\n"+ +// " url_path AS name,\n"+ +// " COUNT(DISTINCT session_id) AS total,\n"+ +// " COALESCE(SUM(COUNT(DISTINCT session_id)) OVER (), 0) AS total_count\n"+ +// "FROM (\n%s) AS filtered_sessions\n"+ +// "GROUP BY url_path\n"+ +// "ORDER BY total DESC\n"+ +// "LIMIT 200 OFFSET 0;", +// main, +// ) +// +// return final +//} +// +//func partitionFilters(filters []FilterObj) (sessionFilters, eventFilters []FilterObj) { +// for _, f := range filters { +// if f.IsEvent { +// eventFilters = append(eventFilters, f) +// } else { +// sessionFilters = append(sessionFilters, f) +// } +// } +// return +//} +// +//func buildSessionWhere(filters []FilterObj) []string { +// var conds []string +// for _, f := range filters { +// switch f.Type { +// case UserCountry: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value))) +// case UserCity: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value))) +// case UserState: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value))) +// case UserId: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value))) +// case UserAnonymousId: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value))) +// case UserOs: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value))) +// case UserBrowser: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value))) +// case UserDevice: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value))) +// case Platform: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value))) +// case RevId: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value))) +// case Referrer: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value))) +// case Duration: +// if len(f.Value) == 2 { +// conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0])) +// conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1])) +// } +// case UtmSource: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value))) +// case UtmMedium: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value))) +// case UtmCampaign: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value))) +// case Metadata: +// conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value))) +// } +// } +// // add /n to each condition +// for i := range conds { +// conds[i] += "\n" +// } +// return conds +//} +// +//func parseOperator(op string) string { +// switch strings.ToLower(op) { +// case OperatorStringContains: +// return OperatorMathEq // interpret as "LIKE" if needed +// case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny: +// return OperatorMathEq +// case OperatorStringStartsWith: +// // might interpret differently in real impl +// return OperatorMathEq +// case OperatorStringEndsWith: +// // might interpret differently in real impl +// return OperatorMathEq +// default: +// return OperatorMathEq +// } +//} +// +//func buildEventsWhere(filters []FilterObj, order EventOrder) (eventConditions []string, having string) { +// basicEventTypes := "(" + +// strings.Join([]string{ +// fmt.Sprintf("%s = 'CLICK'", ColEventName), +// fmt.Sprintf("%s = 'INPUT'", ColEventName), +// fmt.Sprintf("%s = 'LOCATION'", ColEventName), +// fmt.Sprintf("%s = 'CUSTOM'", ColEventName), +// fmt.Sprintf("%s = 'REQUEST'", ColEventName), +// }, " OR ") + ")" +// +// var seq []string +// for _, f := range filters { +// switch f.Type { +// case Click: +// seq = append(seq, seqCond("CLICK", "selector", f)) +// case Input: +// seq = append(seq, seqCond("INPUT", "label", f)) +// case Location: +// seq = append(seq, seqCond("LOCATION", "url_path", f)) +// case Custom: +// seq = append(seq, seqCond("CUSTOM", "name", f)) +// case Fetch: +// seq = append(seq, seqFetchCond("REQUEST", f)) +// case FetchFilterTypeFetchStatusCode: +// seq = append(seq, seqCond("REQUEST", "status", f)) +// default: +// seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type)))) +// } +// } +// eventConditions = []string{basicEventTypes} +// +// // then => sequenceMatch +// // or => OR +// // and => AND +// switch order { +// case EventOrderThen: +// var pattern []string +// for i := range seq { +// pattern = append(pattern, fmt.Sprintf("(?%d)", i+1)) +// } +// having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)", +// strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n")) +// case EventOrderAnd: +// // build AND +// having = strings.Join(seq, " AND ") +// default: +// // default => OR +// var orParts []string +// for _, p := range seq { +// orParts = append(orParts, "("+p+")") +// } +// having = strings.Join(orParts, " OR ") +// } +// return +//} +// +//func seqCond(eventName, key string, f FilterObj) string { +// op := parseOperator(f.Operator) +// return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')", +// ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value)) +//} +// +//func seqFetchCond(eventName string, f FilterObj) string { +// w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))} +// var extras []string +// for _, c := range f.Filters { +// switch c.Type { +// case Fetch: +// if len(c.Value) > 0 { +// extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value))) +// } +// case FetchFilterTypeFetchStatusCode: +// if len(c.Value) > 0 { +// extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value))) +// } +// default: +// // placeholder if needed +// } +// } +// if len(extras) > 0 { +// w = append(w, strings.Join(extras, " AND ")) +// } +// return "(" + strings.Join(w, " AND ") + ")" +//} +// +//func concatValues(v []string) string { +// return strings.Join(v, "") +//} -func main() { - var r Root - err := json.Unmarshal([]byte(jsonInput), &r) - if err != nil { - panic(err) - } - - //fmt.Println("ARGS:", r) - fmt.Println(buildQuery(r)) - //fmt.Println("QUERY PART:", qp) -} - -type Table string -type Column string -type FilterType string -type EventOrder string -type FetchFilterType string - -const ( - UserOs FilterType = "userOs" - UserBrowser FilterType = "userBrowser" - UserDevice FilterType = "userDevice" - UserCountry FilterType = "userCountry" - UserCity FilterType = "userCity" - UserState FilterType = "userState" - UserId FilterType = "userId" - UserAnonymousId FilterType = "userAnonymousId" - Referrer FilterType = "referrer" - RevId FilterType = "revId" - UserOsIos FilterType = "userOsIos" - UserDeviceIos FilterType = "userDeviceIos" - UserCountryIos FilterType = "userCountryIos" - UserIdIos FilterType = "userIdIos" - UserAnonymousIdIos FilterType = "userAnonymousIdIos" - RevIdIos FilterType = "revIdIos" - Duration FilterType = "duration" - Platform FilterType = "platform" - Metadata FilterType = "metadata" - Issue FilterType = "issue" - EventsCount FilterType = "eventsCount" - UtmSource FilterType = "utmSource" - UtmMedium FilterType = "utmMedium" - UtmCampaign FilterType = "utmCampaign" - ThermalState FilterType = "thermalState" - MainThreadCPU FilterType = "mainThreadCPU" - ViewComponent FilterType = "viewComponent" - LogEvent FilterType = "logEvent" - ClickEvent FilterType = "clickEvent" - MemoryUsage FilterType = "memoryUsage" -) - -const ( - Click FilterType = "click" - Input FilterType = "input" - Location FilterType = "location" - Custom FilterType = "custom" - Request FilterType = "request" - Fetch FilterType = "fetch" - GraphQL FilterType = "graphql" - StateAction FilterType = "stateAction" - Error FilterType = "error" - Tag FilterType = "tag" - ClickMobile FilterType = "clickMobile" - InputMobile FilterType = "inputMobile" - ViewMobile FilterType = "viewMobile" - CustomMobile FilterType = "customMobile" - RequestMobile FilterType = "requestMobile" - ErrorMobile FilterType = "errorMobile" - SwipeMobile FilterType = "swipeMobile" -) - -const ( - EventOrderThen EventOrder = "then" - EventOrderOr EventOrder = "or" - EventOrderAnd EventOrder = "and" -) - -const ( - FetchFilterTypeFetchUrl FilterType = "fetchUrl" - FetchFilterTypeFetchStatusCode FilterType = "fetchStatusCode" - FetchFilterTypeFetchMethod FilterType = "fetchMethod" - FetchFilterTypeFetchDuration FilterType = "fetchDuration" - FetchFilterTypeFetchRequestBody FilterType = "fetchRequestBody" - FetchFilterTypeFetchResponseBody FilterType = "fetchResponseBody" -) - -const ( - OperatorStringIs = "is" - OperatorStringIsAny = "isAny" - OperatorStringOn = "on" - OperatorStringOnAny = "onAny" - OperatorStringIsNot = "isNot" - OperatorStringIsUndefined = "isUndefined" - OperatorStringNotOn = "notOn" - OperatorStringContains = "contains" - OperatorStringNotContains = "notContains" - OperatorStringStartsWith = "startsWith" - OperatorStringEndsWith = "endsWith" -) - -const ( - OperatorMathEq = "=" - OperatorMathLt = "<" - OperatorMathGt = ">" - OperatorMathLe = "<=" - OperatorMathGe = ">=" -) - -//-------------------------------------------------- -// Constants for columns, tables, etc. -//-------------------------------------------------- - -const ( - TableEvents Table = "product_analytics.events" - TableSessions Table = "experimental.sessions" - - ColEventTime Column = "main.created_at" - ColEventName Column = "main.`$event_name`" - ColEventProjectID Column = "main.project_id" - ColEventProperties Column = "main.`$properties`" - ColEventSessionID Column = "main.session_id" - ColEventURLPath Column = "main.url_path" - ColEventStatus Column = "main.status" - - ColSessionID Column = "s.session_id" - ColDuration Column = "s.duration" - ColUserCountry Column = "s.user_country" - ColUserCity Column = "s.user_city" - ColUserState Column = "s.user_state" - ColUserID Column = "s.user_id" - ColUserAnonymousID Column = "s.user_anonymous_id" - ColUserOS Column = "s.user_os" - ColUserBrowser Column = "s.user_browser" - ColUserDevice Column = "s.user_device" - ColUserDeviceType Column = "s.user_device_type" - ColRevID Column = "s.rev_id" - ColBaseReferrer Column = "s.base_referrer" - ColUtmSource Column = "s.utm_source" - ColUtmMedium Column = "s.utm_medium" - ColUtmCampaign Column = "s.utm_campaign" - ColMetadata1 Column = "s.metadata_1" - ColSessionProjectID Column = "s.project_id" - ColSessionIsNotNull Column = "isNotNull(s.duration)" -) - -type Root struct { - StartTimestamp int64 `json:"startTimestamp"` - EndTimestamp int64 `json:"endTimestamp"` - Series []Series `json:"series"` -} - -type Series struct { - SeriesID int64 `json:"seriesId"` - Name string `json:"name"` - Filter SeriesFilter `json:"filter"` -} - -type SeriesFilter struct { - Filters []FilterObj `json:"filters"` - EventsOrder EventOrder `json:"eventsOrder"` -} - -type FilterObj struct { - Type FilterType `json:"type"` - IsEvent bool `json:"isEvent"` - Value []string `json:"value"` - Operator string `json:"operator"` - Source string `json:"source"` - Filters []FilterObj `json:"filters"` -} - -// -------------------------------------------------- -func buildQuery(r Root) string { - s := r.Series[0] - - // iterate over series and partition filters - //for _, s := range r.Series { - // sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) - // sessionWhere := buildSessionWhere(sessionFilters) - // eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) - // fmt.Println("SESSION FILTERS:", sessionFilters) - // fmt.Println("EVENT FILTERS:", eventFilters) - // fmt.Println("SESSION WHERE:", sessionWhere) - // fmt.Println("EVENT WHERE:", eventWhere) - // fmt.Println("SEQ HAVING:", seqHaving) - //} - - sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) - sessionWhere := buildSessionWhere(sessionFilters) - eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) - - subQuery := fmt.Sprintf( - "SELECT %s,\n"+ - " MIN(%s) AS first_event_ts,\n"+ - " MAX(%s) AS last_event_ts\n"+ - "FROM %s AS main\n"+ - "WHERE main.project_id = %%(project_id)s\n"+ - " AND %s >= toDateTime(%%(start_time)s/1000)\n"+ - " AND %s <= toDateTime(%%(end_time)s/1000)\n"+ - " AND (%s)\n"+ - "GROUP BY %s\n"+ - "HAVING %s", - ColEventSessionID, - ColEventTime, - ColEventTime, - TableEvents, - ColEventTime, - ColEventTime, - strings.Join(eventWhere, " OR "), - ColEventSessionID, - seqHaving, - ) - - joinQuery := fmt.Sprintf( - "SELECT *\n"+ - "FROM %s AS s\n"+ - "INNER JOIN (\n"+ - " SELECT DISTINCT ev.session_id, ev.`$current_url` AS url_path\n"+ - " FROM %s AS ev\n"+ - " WHERE ev.created_at >= toDateTime(%%(start_time)s/1000)\n"+ - " AND ev.created_at <= toDateTime(%%(end_time)s/1000)\n"+ - " AND ev.project_id = %%(project_id)s\n"+ - " AND ev.`$event_name` = 'LOCATION'\n"+ - ") AS extra_event USING (session_id)\n"+ - "WHERE s.project_id = %%(project_id)s\n"+ - " AND %s\n"+ - " AND s.datetime >= toDateTime(%%(start_time)s/1000)\n"+ - " AND s.datetime <= toDateTime(%%(end_time)s/1000)\n", - TableSessions, - TableEvents, - ColSessionIsNotNull, - ) - - if len(sessionWhere) > 0 { - joinQuery += " AND " + strings.Join(sessionWhere, " AND ") + "\n" - } - - main := fmt.Sprintf( - "SELECT s.session_id AS session_id, s.url_path\n"+ - "FROM (\n%s\n) AS f\n"+ - "INNER JOIN (\n%s) AS s\n"+ - " ON (s.session_id = f.session_id)\n", - subQuery, - joinQuery, - ) - - final := fmt.Sprintf( - "SELECT COUNT(DISTINCT url_path) OVER () AS main_count,\n"+ - " url_path AS name,\n"+ - " COUNT(DISTINCT session_id) AS total,\n"+ - " COALESCE(SUM(COUNT(DISTINCT session_id)) OVER (), 0) AS total_count\n"+ - "FROM (\n%s) AS filtered_sessions\n"+ - "GROUP BY url_path\n"+ - "ORDER BY total DESC\n"+ - "LIMIT 200 OFFSET 0;", - main, - ) - - return final -} - -func partitionFilters(filters []FilterObj) (sessionFilters, eventFilters []FilterObj) { - for _, f := range filters { - if f.IsEvent { - eventFilters = append(eventFilters, f) - } else { - sessionFilters = append(sessionFilters, f) - } - } - return -} - -func buildSessionWhere(filters []FilterObj) []string { - var conds []string - for _, f := range filters { - switch f.Type { - case UserCountry: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value))) - case UserCity: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value))) - case UserState: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value))) - case UserId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value))) - case UserAnonymousId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value))) - case UserOs: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value))) - case UserBrowser: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value))) - case UserDevice: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value))) - case Platform: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value))) - case RevId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value))) - case Referrer: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value))) - case Duration: - if len(f.Value) == 2 { - conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0])) - conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1])) - } - case UtmSource: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value))) - case UtmMedium: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value))) - case UtmCampaign: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value))) - case Metadata: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value))) - } - } - // add /n to each condition - for i := range conds { - conds[i] += "\n" - } - return conds -} - -func parseOperator(op string) string { - switch strings.ToLower(op) { - case OperatorStringContains: - return OperatorMathEq // interpret as "LIKE" if needed - case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny: - return OperatorMathEq - case OperatorStringStartsWith: - // might interpret differently in real impl - return OperatorMathEq - case OperatorStringEndsWith: - // might interpret differently in real impl - return OperatorMathEq - default: - return OperatorMathEq - } -} - -func buildEventsWhere(filters []FilterObj, order EventOrder) (eventConditions []string, having string) { - basicEventTypes := "(" + - strings.Join([]string{ - fmt.Sprintf("%s = 'CLICK'", ColEventName), - fmt.Sprintf("%s = 'INPUT'", ColEventName), - fmt.Sprintf("%s = 'LOCATION'", ColEventName), - fmt.Sprintf("%s = 'CUSTOM'", ColEventName), - fmt.Sprintf("%s = 'REQUEST'", ColEventName), - }, " OR ") + ")" - - var seq []string - for _, f := range filters { - switch f.Type { - case Click: - seq = append(seq, seqCond("CLICK", "selector", f)) - case Input: - seq = append(seq, seqCond("INPUT", "label", f)) - case Location: - seq = append(seq, seqCond("LOCATION", "url_path", f)) - case Custom: - seq = append(seq, seqCond("CUSTOM", "name", f)) - case Fetch: - seq = append(seq, seqFetchCond("REQUEST", f)) - case FetchFilterTypeFetchStatusCode: - seq = append(seq, seqCond("REQUEST", "status", f)) - default: - seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type)))) - } - } - eventConditions = []string{basicEventTypes} - - // then => sequenceMatch - // or => OR - // and => AND - switch order { - case EventOrderThen: - var pattern []string - for i := range seq { - pattern = append(pattern, fmt.Sprintf("(?%d)", i+1)) - } - having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)", - strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n")) - case EventOrderAnd: - // build AND - having = strings.Join(seq, " AND ") - default: - // default => OR - var orParts []string - for _, p := range seq { - orParts = append(orParts, "("+p+")") - } - having = strings.Join(orParts, " OR ") - } - return -} - -func seqCond(eventName, key string, f FilterObj) string { - op := parseOperator(f.Operator) - return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')", - ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value)) -} - -func seqFetchCond(eventName string, f FilterObj) string { - w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))} - var extras []string - for _, c := range f.Filters { - switch c.Type { - case Fetch: - if len(c.Value) > 0 { - extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value))) - } - case FetchFilterTypeFetchStatusCode: - if len(c.Value) > 0 { - extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value))) - } - default: - // placeholder if needed - } - } - if len(extras) > 0 { - w = append(w, strings.Join(extras, " AND ")) - } - return "(" + strings.Join(w, " AND ") + ")" -} - -func concatValues(v []string) string { - return strings.Join(v, "") -} - -const jsonInput = ` -{ - "startTimestamp": 1737043724664, - "endTimestamp": 1737130124664, - "series": [ - { - "seriesId": 610, - "name": "Series 1", - "filter": { - "filters": [ - { - "type": "click", - "isEvent": true, - "value": ["DEPLOYMENT"], - "operator": "on", - "filters": [] - }, - { - "type": "input", - "isEvent": true, - "value": ["a"], - "operator": "contains", - "filters": [] - }, - { - "type": "location", - "isEvent": true, - "value": ["/en/using-or/"], - "operator": "is", - "filters": [] - }, - { - "type": "userCountry", - "isEvent": false, - "value": ["AD"], - "operator": "is", - "filters": [] - }, - { - "type": "userCity", - "isEvent": false, - "value": ["Mumbai"], - "operator": "is", - "filters": [] - }, - { - "type": "userState", - "isEvent": false, - "value": ["Maharashtra"], - "operator": "is", - "filters": [] - }, - { - "type": "userId", - "isEvent": false, - "value": ["test@test.com"], - "operator": "is", - "filters": [] - }, - { - "type": "userAnonymousId", - "isEvent": false, - "value": ["asd"], - "operator": "is", - "filters": [] - }, - { - "type": "userOs", - "isEvent": false, - "value": ["Mac OS X"], - "operator": "is", - "filters": [] - }, - { - "type": "userBrowser", - "isEvent": false, - "value": ["Chrome"], - "operator": "is", - "filters": [] - }, - { - "type": "userDevice", - "isEvent": false, - "value": ["iPhone"], - "operator": "is", - "filters": [] - }, - { - "type": "platform", - "isEvent": false, - "value": ["desktop"], - "operator": "is", - "filters": [] - }, - { - "type": "revId", - "isEvent": false, - "value": ["v1"], - "operator": "is", - "filters": [] - }, - { - "type": "referrer", - "isEvent": false, - "value": ["https://www.google.com/"], - "operator": "is", - "filters": [] - }, - { - "type": "duration", - "isEvent": false, - "value": ["60000", "6000000"], - "operator": "is", - "filters": [] - }, - { - "type": "tag", - "isEvent": true, - "value": ["8"], - "operator": "is", - "filters": [] - }, - { - "type": "utmSource", - "isEvent": false, - "value": ["aaa"], - "operator": "is", - "filters": [] - }, - { - "type": "utmMedium", - "isEvent": false, - "value": ["aa"], - "operator": "is", - "filters": [] - }, - { - "type": "utmCampaign", - "isEvent": false, - "value": ["aaa"], - "operator": "is", - "filters": [] - }, - { - "type": "metadata", - "isEvent": false, - "value": ["bbbb"], - "operator": "is", - "source": "userId", - "filters": [] - }, - { - "type": "custom", - "isEvent": true, - "value": ["test"], - "operator": "is", - "filters": [] - }, - { - "type": "fetch", - "isEvent": true, - "value": [], - "operator": "is", - "filters": [ - { - "type": "fetchUrl", - "isEvent": false, - "value": ["/ai/docs/chat"], - "operator": "is", - "filters": [] - }, - { - "type": "fetchStatusCode", - "isEvent": false, - "value": ["400"], - "operator": "=", - "filters": [] - }, - { - "type": "fetchMethod", - "isEvent": false, - "value": [], - "operator": "is", - "filters": [] - }, - { - "type": "fetchDuration", - "isEvent": false, - "value": [], - "operator": "=", - "filters": [] - }, - { - "type": "fetchRequestBody", - "isEvent": false, - "value": [], - "operator": "is", - "filters": [] - }, - { - "type": "fetchResponseBody", - "isEvent": false, - "value": [], - "operator": "is", - "filters": [] - } - ] - } - ], - "eventsOrder": "then" - } - } - ] -} -` +//const jsonInput = ` +//{ +// "startTimestamp": 1737043724664, +// "endTimestamp": 1737130124664, +// "projectId": 1, +// "series": [ +// { +// "seriesId": 610, +// "name": "Series 1", +// "filter": { +// "filters": [ +// { +// "type": "click", +// "isEvent": true, +// "value": ["DEPLOYMENT"], +// "operator": "on", +// "filters": [] +// }, +// { +// "type": "input", +// "isEvent": true, +// "value": ["a"], +// "operator": "contains", +// "filters": [] +// }, +// { +// "type": "location", +// "isEvent": true, +// "value": ["/en/using-or/"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "userCountry", +// "isEvent": false, +// "value": ["AD"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "userCity", +// "isEvent": false, +// "value": ["Mumbai"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "userState", +// "isEvent": false, +// "value": ["Maharashtra"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "userId", +// "isEvent": false, +// "value": ["test@test.com"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "userAnonymousId", +// "isEvent": false, +// "value": ["asd"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "userOs", +// "isEvent": false, +// "value": ["Mac OS X"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "userBrowser", +// "isEvent": false, +// "value": ["Chrome"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "userDevice", +// "isEvent": false, +// "value": ["iPhone"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "platform", +// "isEvent": false, +// "value": ["desktop"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "revId", +// "isEvent": false, +// "value": ["v1"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "referrer", +// "isEvent": false, +// "value": ["https://www.google.com/"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "duration", +// "isEvent": false, +// "value": ["60000", "6000000"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "utmSource", +// "isEvent": false, +// "value": ["aaa"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "utmMedium", +// "isEvent": false, +// "value": ["aa"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "utmCampaign", +// "isEvent": false, +// "value": ["aaa"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "metadata", +// "isEvent": false, +// "value": ["bbbb"], +// "operator": "is", +// "source": "userId", +// "filters": [] +// }, +// { +// "type": "custom", +// "isEvent": true, +// "value": ["test"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "fetch", +// "isEvent": true, +// "value": [], +// "operator": "is", +// "filters": [ +// { +// "type": "fetchUrl", +// "isEvent": false, +// "value": ["/ai/docs/chat"], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "fetchStatusCode", +// "isEvent": false, +// "value": ["400"], +// "operator": "=", +// "filters": [] +// }, +// { +// "type": "fetchMethod", +// "isEvent": false, +// "value": [], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "fetchDuration", +// "isEvent": false, +// "value": [], +// "operator": "=", +// "filters": [] +// }, +// { +// "type": "fetchRequestBody", +// "isEvent": false, +// "value": [], +// "operator": "is", +// "filters": [] +// }, +// { +// "type": "fetchResponseBody", +// "isEvent": false, +// "value": [], +// "operator": "is", +// "filters": [] +// } +// ] +// } +// ], +// "eventsOrder": "then" +// } +// } +// ] +//} +//` diff --git a/backend/pkg/analytics/query/funnel.go b/backend/pkg/analytics/query/funnel.go new file mode 100644 index 000000000..c8632b401 --- /dev/null +++ b/backend/pkg/analytics/query/funnel.go @@ -0,0 +1,7 @@ +package query + +type FunnelQueryBuilder struct{} + +func (f FunnelQueryBuilder) Build(p MetricPayload) string { + return "-- Funnel query placeholder" +} diff --git a/backend/pkg/analytics/query/model.go b/backend/pkg/analytics/query/model.go new file mode 100644 index 000000000..9a315e0db --- /dev/null +++ b/backend/pkg/analytics/query/model.go @@ -0,0 +1,137 @@ +package query + +type Table string +type Column string +type MetricType string +type FilterType string +type EventOrder string + +const ( + TableEvents Table = "product_analytics.events" + TableSessions Table = "experimental.sessions" +) + +const ( + ColEventTime Column = "main.created_at" + ColEventName Column = "main.`$event_name`" + ColEventProjectID Column = "main.project_id" + ColEventProperties Column = "main.`$properties`" + ColEventSessionID Column = "main.session_id" + ColEventURLPath Column = "main.url_path" + ColEventStatus Column = "main.status" +) + +const ( + ColSessionID Column = "s.session_id" + ColDuration Column = "s.duration" + ColUserCountry Column = "s.user_country" + ColUserCity Column = "s.user_city" + ColUserState Column = "s.user_state" + ColUserID Column = "s.user_id" + ColUserAnonymousID Column = "s.user_anonymous_id" + ColUserOS Column = "s.user_os" + ColUserBrowser Column = "s.user_browser" + ColUserDevice Column = "s.user_device" + ColUserDeviceType Column = "s.user_device_type" + ColRevID Column = "s.rev_id" + ColBaseReferrer Column = "s.base_referrer" + ColUtmSource Column = "s.utm_source" + ColUtmMedium Column = "s.utm_medium" + ColUtmCampaign Column = "s.utm_campaign" + ColMetadata1 Column = "s.metadata_1" + ColSessionProjectID Column = "s.project_id" + ColSessionIsNotNull Column = "isNotNull(s.duration)" +) + +const ( + MetricTypeTimeseries MetricType = "timeseries" + MetricTypeTable MetricType = "table" + MetricTypeFunnel MetricType = "funnel" +) + +const ( + EventOrderThen EventOrder = "then" + EventOrderOr EventOrder = "or" + EventOrderAnd EventOrder = "and" +) + +type MetricPayload struct { + StartTimestamp int64 `json:"startTimestamp"` + EndTimestamp int64 `json:"endTimestamp"` + Density int `json:"density"` + MetricOf string `json:"metricOf"` + MetricType MetricType `json:"metricType"` + MetricFormat string `json:"metricFormat"` + ViewType string `json:"viewType"` + Name string `json:"name"` + Series []Series `json:"series"` + CompareTo *string `json:"compareTo"` +} + +type Series struct { + Name string `json:"name"` + Filter struct { + Filters []Filter `json:"filters"` + EventsOrder EventOrder `json:"eventsOrder"` + } `json:"filter"` +} + +type Filter struct { + Type FilterType `json:"type"` + IsEvent bool `json:"isEvent"` + Value []string `json:"value"` + Operator string `json:"operator"` + Filters []Filter `json:"filters"` +} + +const ( + FilterUserOs FilterType = "userOs" + FilterUserBrowser FilterType = "userBrowser" + FilterUserDevice FilterType = "userDevice" + FilterUserCountry FilterType = "userCountry" + FilterUserCity FilterType = "userCity" + FilterUserState FilterType = "userState" + FilterUserId FilterType = "userId" + FilterUserAnonymousId FilterType = "userAnonymousId" + FilterReferrer FilterType = "referrer" + FilterRevId FilterType = "revId" + FilterUserOsIos FilterType = "userOsIos" + FilterUserDeviceIos FilterType = "userDeviceIos" + FilterUserCountryIos FilterType = "userCountryIos" + FilterUserIdIos FilterType = "userIdIos" + FilterUserAnonymousIdIos FilterType = "userAnonymousIdIos" + FilterRevIdIos FilterType = "revIdIos" + FilterDuration FilterType = "duration" + FilterPlatform FilterType = "platform" + FilterMetadata FilterType = "metadata" + FilterIssue FilterType = "issue" + FilterEventsCount FilterType = "eventsCount" + FilterUtmSource FilterType = "utmSource" + FilterUtmMedium FilterType = "utmMedium" + FilterUtmCampaign FilterType = "utmCampaign" + FilterThermalState FilterType = "thermalState" + FilterMainThreadCPU FilterType = "mainThreadCPU" + FilterViewComponent FilterType = "viewComponent" + FilterLogEvent FilterType = "logEvent" + FilterMemoryUsage FilterType = "memoryUsage" + FilterClick FilterType = "click" + FilterInput FilterType = "input" + FilterLocation FilterType = "location" + FilterCustom FilterType = "customEvent" + FilterFetch FilterType = "fetch" + FilterFetchStatusCode FilterType = "status" +) + +const ( + OperatorStringIs = "is" + OperatorStringIsAny = "isAny" + OperatorStringOn = "on" + OperatorStringOnAny = "onAny" + OperatorStringIsNot = "isNot" + OperatorStringIsUndefined = "isUndefined" + OperatorStringNotOn = "notOn" + OperatorStringContains = "contains" + OperatorStringNotContains = "notContains" + OperatorStringStartsWith = "startsWith" + OperatorStringEndsWith = "endsWith" +) diff --git a/backend/pkg/analytics/query/queryBuilder.go b/backend/pkg/analytics/query/queryBuilder.go new file mode 100644 index 000000000..b45359a04 --- /dev/null +++ b/backend/pkg/analytics/query/queryBuilder.go @@ -0,0 +1,253 @@ +package query + +import ( + "encoding/json" + "fmt" + "strings" +) + +type NewQueryBuilder interface { + Build(MetricPayload) string +} + +func buildEventSubquery(p MetricPayload) string { + baseEventsWhere := buildBaseEventsWhere(p) + sequenceCond := buildSequenceCondition(p.Series) + sessionsWhere := buildSessionsWhere(p) + + // If there's no sequence pattern, skip HAVING entirely. + if sequenceCond.seqPattern == "" { + return fmt.Sprintf(` +SELECT s.%[1]s AS %[1]s, + s.datetime AS datetime +FROM ( + SELECT main.session_id, + MIN(main.created_at) AS first_event_ts, + MAX(main.created_at) AS last_event_ts + FROM product_analytics.events AS main + WHERE %[2]s + GROUP BY session_id +) AS f +INNER JOIN ( + SELECT * + FROM experimental.sessions AS s + WHERE %[3]s +) AS s ON (s.session_id = f.session_id) +`, pickIDField(p), baseEventsWhere, sessionsWhere) + } + + return fmt.Sprintf(` +SELECT s.%[1]s AS %[1]s, + s.datetime AS datetime +FROM ( + SELECT main.session_id, + MIN(main.created_at) AS first_event_ts, + MAX(main.created_at) AS last_event_ts + FROM product_analytics.events AS main + WHERE %[2]s + GROUP BY session_id + HAVING sequenceMatch('%[3]s')(toDateTime(main.created_at), %[4]s) +) AS f +INNER JOIN ( + SELECT * + FROM experimental.sessions AS s + WHERE %[5]s +) AS s ON (s.session_id = f.session_id) +`, pickIDField(p), baseEventsWhere, sequenceCond.seqPattern, sequenceCond.seqEvents, sessionsWhere) +} + +func pickIDField(p MetricPayload) string { + if p.MetricOf == "userCount" { + return "user_id" + } + return "session_id" +} + +func buildBaseEventsWhere(p MetricPayload) string { + projectID := 5 + ts := fmt.Sprintf( + `(main.created_at >= toDateTime(%d / 1000) AND main.created_at <= toDateTime(%d / 1000))`, + p.StartTimestamp, + p.EndTimestamp, + ) + return fmt.Sprintf(`main.project_id = %d AND %s`, projectID, ts) +} + +func buildSessionsWhere(p MetricPayload) string { + projectID := 5 + ts := fmt.Sprintf( + `(s.datetime >= toDateTime(%d / 1000) AND s.datetime <= toDateTime(%d / 1000))`, + p.StartTimestamp, + p.EndTimestamp, + ) + return fmt.Sprintf(`s.project_id = %d AND isNotNull(s.duration) AND %s`, projectID, ts) +} + +type sequenceParts struct { + seqPattern string + seqEvents string +} + +func buildSequenceCondition(series []Series) sequenceParts { + var events []string + for _, s := range series { + if len(s.Filter.Filters) > 0 { + events = append(events, buildOneSeriesSequence(s.Filter.Filters)) + } + } + + if len(events) == 0 { + return sequenceParts{"", ""} + } + + // For n events, we need a pattern like `(?1)(?2)(?3)...( ?n )`. + pattern := "" + for i := 1; i <= len(events); i++ { + pattern += fmt.Sprintf("(?%d)", i) + } + + return sequenceParts{ + seqPattern: pattern, + seqEvents: strings.Join(events, ", "), + } +} + +func buildOneSeriesSequence(filters []Filter) string { + return strings.Join(buildFilterConditions(filters), " AND ") +} + +func buildFilterConditions(filters []Filter) []string { + var out []string + for _, f := range filters { + switch f.Type { + case FilterClick: + out = append(out, + fmt.Sprintf(`(main."$event_name" = 'CLICK' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, + strings.Join(f.Value, "','"))) + case FilterInput: + out = append(out, + fmt.Sprintf(`(main."$event_name" = 'INPUT' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, + strings.Join(f.Value, "','"))) + // TODO add more cases to cover all the events + default: + out = append(out, + fmt.Sprintf(`(main."$event_name" = '%s')`, strings.ToUpper(string(f.Type)))) + } + } + return out +} + +func main() { + //input := GetPayload(MetricTypeTimeseries) + input := GetPayload(MetricTypeTable) + + var payload MetricPayload + err := json.Unmarshal([]byte(input), &payload) + if err != nil { + return + } + + var qb NewQueryBuilder + switch payload.MetricType { + case MetricTypeTimeseries: + qb = TimeSeriesQueryBuilder{} + case MetricTypeFunnel: + qb = FunnelQueryBuilder{} + case MetricTypeTable: + qb = TableQueryBuilder{} + default: + qb = TimeSeriesQueryBuilder{} + } + + query := qb.Build(payload) + fmt.Println(query) +} + +func GetPayload(metricType MetricType) string { + switch metricType { + case MetricTypeTimeseries: + return `{ + "startTimestamp": 1738796399999, + "endTimestamp": 1739401199999, + "density": 7, + "metricOf": "sessionCount", + "metricValue": [], + "metricType": "timeseries", + "metricFormat": "sessionCount", + "viewType": "lineChart", + "name": "Untitled Trend", + "series": [ + { + "name": "Series 1", + "filter": { + "filters": [ + { + "type": "userId", + "isEvent": false, + "value": [ + "test@test.com" + ], + "operator": "is", + "filters": [] + } + ], + "eventsOrder": "then" + } + } + ] + }` + case MetricTypeFunnel: + return `{}` + case MetricTypeTable: + return `{ + "startTimestamp": 1737586800000, + "endTimestamp": 1738277999999, + "density": 7, + "metricOf": "userDevice", + "metricType": "table", + "metricFormat": "sessionCount", + "viewType": "table", + "name": "Untitled Trend", + "series": [ + { + "name": "Series 1", + "filter": { + "filters": [ + { + "type": "click", + "isEvent": true, + "value": ["Manuscripts"], + "operator": "on", + "filters": [] + } + ], + "eventsOrder": "then" + } + }, + { + "name": "Series 2", + "filter": { + "filters": [ + { + "type": "input", + "isEvent": true, + "value": ["test"], + "operator": "is", + "filters": [] + } + ], + "eventsOrder": "then" + } + } + ], + "page": 1, + "limit": 20, + "compareTo": null, + "config": { + "col": 2 + } + }` + default: + return `{}` + } +} diff --git a/backend/pkg/analytics/query/table.go b/backend/pkg/analytics/query/table.go new file mode 100644 index 000000000..616f25f65 --- /dev/null +++ b/backend/pkg/analytics/query/table.go @@ -0,0 +1,252 @@ +package query + +import ( + "fmt" + "strings" +) + +type TableQueryBuilder struct{} + +func (t TableQueryBuilder) Build(p MetricPayload) string { + return t.buildQuery(p) +} + +func (t TableQueryBuilder) buildQuery(r MetricPayload) string { + s := r.Series[0] + sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) + sessionWhere := buildSessionWhere(sessionFilters) + eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) + + subQuery := fmt.Sprintf( + "SELECT %s,\n"+ + " MIN(%s) AS first_event_ts,\n"+ + " MAX(%s) AS last_event_ts\n"+ + "FROM %s AS main\n"+ + "WHERE main.project_id = %%(project_id)s\n"+ + " AND %s >= toDateTime(%%(start_time)s/1000)\n"+ + " AND %s <= toDateTime(%%(end_time)s/1000)\n"+ + " AND (%s)\n"+ + "GROUP BY %s\n"+ + "HAVING %s", + ColEventSessionID, + ColEventTime, + ColEventTime, + TableEvents, + ColEventTime, + ColEventTime, + strings.Join(eventWhere, " OR "), + ColEventSessionID, + seqHaving, + ) + + joinQuery := fmt.Sprintf( + "SELECT *\n"+ + "FROM %s AS s\n"+ + "INNER JOIN (\n"+ + " SELECT DISTINCT ev.session_id, ev.`$current_url` AS url_path\n"+ + " FROM %s AS ev\n"+ + " WHERE ev.created_at >= toDateTime(%%(start_time)s/1000)\n"+ + " AND ev.created_at <= toDateTime(%%(end_time)s/1000)\n"+ + " AND ev.project_id = %%(project_id)s\n"+ + " AND ev.`$event_name` = 'LOCATION'\n"+ + ") AS extra_event USING (session_id)\n"+ + "WHERE s.project_id = %%(project_id)s\n"+ + " AND isNotNull(s.duration)\n"+ + " AND s.datetime >= toDateTime(%%(start_time)s/1000)\n"+ + " AND s.datetime <= toDateTime(%%(end_time)s/1000)\n", + TableSessions, + TableEvents, + ) + + if len(sessionWhere) > 0 { + joinQuery += " AND " + strings.Join(sessionWhere, " AND ") + "\n" + } + + main := fmt.Sprintf( + "SELECT s.session_id AS session_id, s.url_path\n"+ + "FROM (\n%s\n) AS f\n"+ + "INNER JOIN (\n%s) AS s\n"+ + " ON (s.session_id = f.session_id)\n", + subQuery, + joinQuery, + ) + + final := fmt.Sprintf( + "SELECT COUNT(DISTINCT url_path) OVER () AS main_count,\n"+ + " url_path AS name,\n"+ + " COUNT(DISTINCT session_id) AS total,\n"+ + " COALESCE(SUM(COUNT(DISTINCT session_id)) OVER (), 0) AS total_count\n"+ + "FROM (\n%s) AS filtered_sessions\n"+ + "GROUP BY url_path\n"+ + "ORDER BY total DESC\n"+ + "LIMIT 200 OFFSET 0;", + main, + ) + + return final +} + +func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters []Filter) { + for _, f := range filters { + if f.IsEvent { + eventFilters = append(eventFilters, f) + } else { + sessionFilters = append(sessionFilters, f) + } + } + return +} + +func buildSessionWhere(filters []Filter) []string { + var conds []string + for _, f := range filters { + switch f.Type { + case FilterUserCountry: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value))) + case FilterUserCity: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value))) + case FilterUserState: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value))) + case FilterUserId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value))) + case FilterUserAnonymousId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value))) + case FilterUserOs: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value))) + case FilterUserBrowser: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value))) + case FilterUserDevice: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value))) + case FilterPlatform: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value))) + case FilterRevId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value))) + case FilterReferrer: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value))) + case FilterDuration: + if len(f.Value) == 2 { + conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0])) + conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1])) + } + case FilterUtmSource: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value))) + case FilterUtmMedium: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value))) + case FilterUtmCampaign: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value))) + case FilterMetadata: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value))) + } + } + // adding /n to each condition for better readability, can be removed. + for i := range conds { + conds[i] += "\n" + } + return conds +} + +func concatValues(v []string) string { + return strings.Join(v, "") +} + +func buildEventsWhere(filters []Filter, order EventOrder) (eventConditions []string, having string) { + basicEventTypes := "(" + + strings.Join([]string{ + fmt.Sprintf("%s = 'CLICK'", ColEventName), + fmt.Sprintf("%s = 'INPUT'", ColEventName), + fmt.Sprintf("%s = 'LOCATION'", ColEventName), + fmt.Sprintf("%s = 'CUSTOM'", ColEventName), + fmt.Sprintf("%s = 'REQUEST'", ColEventName), + }, " OR ") + ")" + + var seq []string + for _, f := range filters { + switch f.Type { + case FilterClick: + seq = append(seq, seqCond("CLICK", "selector", f)) + case FilterInput: + seq = append(seq, seqCond("INPUT", "label", f)) + case FilterLocation: + seq = append(seq, seqCond("LOCATION", "url_path", f)) + case FilterCustom: + seq = append(seq, seqCond("CUSTOM", "name", f)) + case FilterFetch: + seq = append(seq, seqFetchCond("REQUEST", f)) + case FilterFetchStatusCode: + seq = append(seq, seqCond("REQUEST", "status", f)) + default: + seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type)))) + } + } + eventConditions = []string{basicEventTypes} + + // then => sequenceMatch + // or => OR + // and => AND + switch order { + case EventOrderThen: + var pattern []string + for i := range seq { + pattern = append(pattern, fmt.Sprintf("(?%d)", i+1)) + } + having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)", + strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n")) + case EventOrderAnd: + // build AND + having = strings.Join(seq, " AND ") + default: + // default => OR + var orParts []string + for _, p := range seq { + orParts = append(orParts, "("+p+")") + } + having = strings.Join(orParts, " OR ") + } + return +} + +func seqCond(eventName, key string, f Filter) string { + op := parseOperator(f.Operator) + return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')", + ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value)) +} + +func seqFetchCond(eventName string, f Filter) string { + w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))} + var extras []string + for _, c := range f.Filters { + switch c.Type { + case FilterFetch: + if len(c.Value) > 0 { + extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value))) + } + case FilterFetchStatusCode: + if len(c.Value) > 0 { + extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value))) + } + default: + // placeholder if needed + } + } + if len(extras) > 0 { + w = append(w, strings.Join(extras, " AND ")) + } + return "(" + strings.Join(w, " AND ") + ")" +} + +func parseOperator(op string) string { + // TODO implement this properly + switch strings.ToLower(op) { + case OperatorStringContains: + return "LIKE" + case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny: + return "=" + case OperatorStringStartsWith: + return "LIKE" + case OperatorStringEndsWith: + // might interpret differently in real impl + return "=" + default: + return "=" + } +} diff --git a/backend/pkg/analytics/query/timeseries.go b/backend/pkg/analytics/query/timeseries.go new file mode 100644 index 000000000..73d49f6cb --- /dev/null +++ b/backend/pkg/analytics/query/timeseries.go @@ -0,0 +1,42 @@ +package query + +import "fmt" + +type TimeSeriesQueryBuilder struct{} + +func (t TimeSeriesQueryBuilder) Build(p MetricPayload) string { + switch p.MetricOf { + case "sessionCount": + return t.buildSessionCountQuery(p) + case "userCount": + return t.buildUserCountQuery(p) + default: + return "" + } +} + +func (TimeSeriesQueryBuilder) buildSessionCountQuery(p MetricPayload) string { + subquery := buildEventSubquery(p) + return fmt.Sprintf(`SELECT toUnixTimestamp( + toStartOfInterval(processed_sessions.datetime, INTERVAL 115199 second) +) * 1000 AS timestamp, +COUNT(processed_sessions.session_id) AS count +FROM ( + %s +) AS processed_sessions +GROUP BY timestamp +ORDER BY timestamp;`, subquery) +} + +func (TimeSeriesQueryBuilder) buildUserCountQuery(p MetricPayload) string { + subquery := buildEventSubquery(p) + return fmt.Sprintf(`SELECT toUnixTimestamp( + toStartOfInterval(processed_sessions.datetime, INTERVAL 115199 second) +) * 1000 AS timestamp, +COUNT(DISTINCT processed_sessions.user_id) AS count +FROM ( + %s +) AS processed_sessions +GROUP BY timestamp +ORDER BY timestamp;`, subquery) +} From 5c0139b66c869a953f4ba93c6dd163403a2f4137 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 18 Feb 2025 17:49:47 +0100 Subject: [PATCH 05/30] feat(analytics): timeseries queries with filters and events --- backend/pkg/analytics/charts/charts.go | 2 +- backend/pkg/analytics/charts/metric_funnel.go | 2 +- backend/pkg/analytics/charts/metric_table.go | 169 +---- .../pkg/analytics/charts/metric_timeseries.go | 153 +++-- backend/pkg/analytics/charts/model.go | 20 +- backend/pkg/analytics/charts/query.go | 578 ++++++++++++++---- 6 files changed, 585 insertions(+), 339 deletions(-) diff --git a/backend/pkg/analytics/charts/charts.go b/backend/pkg/analytics/charts/charts.go index 46a40d364..c460974cc 100644 --- a/backend/pkg/analytics/charts/charts.go +++ b/backend/pkg/analytics/charts/charts.go @@ -32,7 +32,7 @@ func (s *chartsImpl) GetData(projectId int, userID uint64, req *MetricPayload) ( return nil, fmt.Errorf("request is empty") } - payload := &Payload{ + payload := Payload{ ProjectId: projectId, UserId: userID, MetricPayload: req, diff --git a/backend/pkg/analytics/charts/metric_funnel.go b/backend/pkg/analytics/charts/metric_funnel.go index 0f58c886d..9de3a9dee 100644 --- a/backend/pkg/analytics/charts/metric_funnel.go +++ b/backend/pkg/analytics/charts/metric_funnel.go @@ -4,6 +4,6 @@ import "openreplay/backend/pkg/analytics/db" type FunnelQueryBuilder struct{} -func (f FunnelQueryBuilder) Execute(p *Payload, conn db.Connector) (interface{}, error) { +func (f FunnelQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { return "-- Funnel query placeholder", nil } diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index 46135fd55..bf0fe279c 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -8,11 +8,11 @@ import ( type TableQueryBuilder struct{} -func (t TableQueryBuilder) Execute(p *Payload, conn db.Connector) (interface{}, error) { +func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { return t.buildQuery(p) } -func (t TableQueryBuilder) buildQuery(r *Payload) (string, error) { +func (t TableQueryBuilder) buildQuery(r Payload) (string, error) { s := r.Series[0] sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) sessionWhere := buildSessionWhere(sessionFilters) @@ -86,168 +86,3 @@ func (t TableQueryBuilder) buildQuery(r *Payload) (string, error) { return final, nil } - -func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters []Filter) { - for _, f := range filters { - if f.IsEvent { - eventFilters = append(eventFilters, f) - } else { - sessionFilters = append(sessionFilters, f) - } - } - return -} - -func buildSessionWhere(filters []Filter) []string { - var conds []string - for _, f := range filters { - switch f.Type { - case FilterUserCountry: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value))) - case FilterUserCity: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value))) - case FilterUserState: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value))) - case FilterUserId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value))) - case FilterUserAnonymousId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value))) - case FilterUserOs: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value))) - case FilterUserBrowser: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value))) - case FilterUserDevice: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value))) - case FilterPlatform: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value))) - case FilterRevId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value))) - case FilterReferrer: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value))) - case FilterDuration: - if len(f.Value) == 2 { - conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0])) - conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1])) - } - case FilterUtmSource: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value))) - case FilterUtmMedium: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value))) - case FilterUtmCampaign: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value))) - case FilterMetadata: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value))) - } - } - // adding /n to each condition for better readability, can be removed. - for i := range conds { - conds[i] += "\n" - } - return conds -} - -func concatValues(v []string) string { - return strings.Join(v, "") -} - -func buildEventsWhere(filters []Filter, order EventOrder) (eventConditions []string, having string) { - basicEventTypes := "(" + - strings.Join([]string{ - fmt.Sprintf("%s = 'CLICK'", ColEventName), - fmt.Sprintf("%s = 'INPUT'", ColEventName), - fmt.Sprintf("%s = 'LOCATION'", ColEventName), - fmt.Sprintf("%s = 'CUSTOM'", ColEventName), - fmt.Sprintf("%s = 'REQUEST'", ColEventName), - }, " OR ") + ")" - - var seq []string - for _, f := range filters { - switch f.Type { - case FilterClick: - seq = append(seq, seqCond("CLICK", "selector", f)) - case FilterInput: - seq = append(seq, seqCond("INPUT", "label", f)) - case FilterLocation: - seq = append(seq, seqCond("LOCATION", "url_path", f)) - case FilterCustom: - seq = append(seq, seqCond("CUSTOM", "name", f)) - case FilterFetch: - seq = append(seq, seqFetchCond("REQUEST", f)) - case FilterFetchStatusCode: - seq = append(seq, seqCond("REQUEST", "status", f)) - default: - seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type)))) - } - } - eventConditions = []string{basicEventTypes} - - // then => sequenceMatch - // or => OR - // and => AND - switch order { - case EventOrderThen: - var pattern []string - for i := range seq { - pattern = append(pattern, fmt.Sprintf("(?%d)", i+1)) - } - having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)", - strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n")) - case EventOrderAnd: - // build AND - having = strings.Join(seq, " AND ") - default: - // default => OR - var orParts []string - for _, p := range seq { - orParts = append(orParts, "("+p+")") - } - having = strings.Join(orParts, " OR ") - } - return -} - -func seqCond(eventName, key string, f Filter) string { - op := parseOperator(f.Operator) - return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')", - ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value)) -} - -func seqFetchCond(eventName string, f Filter) string { - w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))} - var extras []string - for _, c := range f.Filters { - switch c.Type { - case FilterFetch: - if len(c.Value) > 0 { - extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value))) - } - case FilterFetchStatusCode: - if len(c.Value) > 0 { - extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value))) - } - default: - // placeholder if needed - } - } - if len(extras) > 0 { - w = append(w, strings.Join(extras, " AND ")) - } - return "(" + strings.Join(w, " AND ") + ")" -} - -func parseOperator(op string) string { - // TODO implement this properly - switch strings.ToLower(op) { - case OperatorStringContains: - return "LIKE" - case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny: - return "=" - case OperatorStringStartsWith: - return "LIKE" - case OperatorStringEndsWith: - // might interpret differently in real impl - return "=" - default: - return "=" - } -} diff --git a/backend/pkg/analytics/charts/metric_timeseries.go b/backend/pkg/analytics/charts/metric_timeseries.go index cc1df46c3..976b42b1b 100644 --- a/backend/pkg/analytics/charts/metric_timeseries.go +++ b/backend/pkg/analytics/charts/metric_timeseries.go @@ -4,11 +4,12 @@ import ( "fmt" "log" "openreplay/backend/pkg/analytics/db" + "strings" ) type TimeSeriesQueryBuilder struct{} -func (t TimeSeriesQueryBuilder) Execute(p *Payload, conn db.Connector) (interface{}, error) { +func (t TimeSeriesQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { query, err := t.buildQuery(p) if err != nil { log.Fatalf("Error building query: %v", err) @@ -23,7 +24,6 @@ func (t TimeSeriesQueryBuilder) Execute(p *Payload, conn db.Connector) (interfac defer rows.Close() var results []DataPoint - for rows.Next() { var res DataPoint if err := rows.Scan(&res.Timestamp, &res.Count); err != nil { @@ -37,7 +37,7 @@ func (t TimeSeriesQueryBuilder) Execute(p *Payload, conn db.Connector) (interfac return filled, nil } -func (t TimeSeriesQueryBuilder) buildQuery(p *Payload) (string, error) { +func (t TimeSeriesQueryBuilder) buildQuery(p Payload) (string, error) { query := "" switch p.MetricOf { case "sessionCount": @@ -50,67 +50,102 @@ func (t TimeSeriesQueryBuilder) buildQuery(p *Payload) (string, error) { return query, nil } -func (TimeSeriesQueryBuilder) buildSessionCountQuery(p *Payload) string { - stepSize := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) - subquery := buildEventSubquery(p) - return fmt.Sprintf(`SELECT toUnixTimestamp( - toStartOfInterval(processed_sessions.datetime, INTERVAL %d second) -) * 1000 AS timestamp, -COUNT(processed_sessions.session_id) AS count -FROM ( - %s -) AS processed_sessions -GROUP BY timestamp -ORDER BY timestamp;`, stepSize, subquery) +func (TimeSeriesQueryBuilder) buildSessionCountQuery(p Payload) string { + eventConds, eventNames := buildEventConditions(p.Series[0].Filter.Filters) + sessionConds := buildSessionConditions(p.Series[0].Filter.Filters) + staticEvt := buildStaticEventWhere(p) + sessWhere, sessJoin := buildStaticSessionWhere(p, sessionConds) + eventsSubQuery := buildEventsSubQuery(eventConds, eventNames, staticEvt, sessWhere, sessJoin) + mainQuery := buildMainQuery(p, eventsSubQuery) + return mainQuery } -func (TimeSeriesQueryBuilder) buildUserCountQuery(p *Payload) string { - stepSize := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) - subquery := buildEventSubquery(p) - return fmt.Sprintf(`SELECT toUnixTimestamp( - toStartOfInterval(processed_sessions.datetime, INTERVAL %d second) -) * 1000 AS timestamp, -COUNT(DISTINCT processed_sessions.user_id) AS count -FROM ( - %s -) AS processed_sessions -GROUP BY timestamp -ORDER BY timestamp;`, stepSize, subquery) +func (TimeSeriesQueryBuilder) buildUserCountQuery(p Payload) string { + eventConds, eventNames := buildEventConditions(p.Series[0].Filter.Filters) + sessionConds := buildSessionConditions(p.Series[0].Filter.Filters) + staticEvt := buildStaticEventWhere(p) + sessWhere, sessJoin := buildStaticSessionWhere(p, sessionConds) + eventsSubQuery := buildEventsSubQuery(eventConds, eventNames, staticEvt, sessWhere, sessJoin) + mainQuery := buildMainQuery(p, eventsSubQuery) + return mainQuery } -func FillMissingDataPoints( - startTime, endTime int64, - density int, - neutral DataPoint, - rows []DataPoint, - timeCoefficient int64, -) []DataPoint { - if density <= 1 { - return rows +func buildEventsSubQuery(eventConds, eventNames []string, staticEvt, sessWhere, sessJoin string) string { + if len(eventConds) == 0 && len(eventNames) == 0 { + return fmt.Sprintf(noEventSubQueryTpl, sessJoin) } - - stepSize := uint64(getStepSize(startTime, endTime, density, false, 1000)) - bucketSize := stepSize * uint64(timeCoefficient) - - lookup := make(map[uint64]DataPoint) - for _, dp := range rows { - if dp.Timestamp < uint64(startTime) { - continue - } - bucket := uint64(startTime) + (((dp.Timestamp - uint64(startTime)) / bucketSize) * bucketSize) - lookup[bucket] = dp - } - - results := make([]DataPoint, 0, density) - for i := 0; i < density; i++ { - ts := uint64(startTime) + uint64(i)*bucketSize - if dp, ok := lookup[ts]; ok { - results = append(results, dp) - } else { - nd := neutral - nd.Timestamp = ts - results = append(results, nd) + var evtNameClause string + var unique []string + for _, name := range eventNames { + if !contains(unique, name) { + unique = append(unique, name) } } - return results + if len(unique) > 0 { + evtNameClause = fmt.Sprintf("AND main.`$event_name` IN (%s)", buildInClause(unique)) + } + having := "" + if len(eventConds) > 0 { + having = buildHavingClause(eventConds) + } + evtWhere := staticEvt + if len(eventConds) > 0 { + evtWhere += " AND " + strings.Join(eventConds, " AND ") + } + return fmt.Sprintf(eventsSubQueryTpl, evtWhere, sessWhere, evtNameClause, having, sessJoin) } + +func buildMainQuery(p Payload, subQuery string) string { + step := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) + return fmt.Sprintf(mainQueryTpl, p.StartTimestamp, p.EndTimestamp, step, subQuery, step) +} + +var eventsSubQueryTpl = ` +SELECT multiIf( + s.user_id IS NOT NULL AND s.user_id != '', s.user_id, + s.user_anonymous_id IS NOT NULL AND s.user_anonymous_id != '', s.user_anonymous_id, + toString(s.user_uuid)) AS user_id, + s.datetime AS datetime +FROM ( + SELECT main.session_id, + MIN(main.created_at) AS first_event_ts, + MAX(main.created_at) AS last_event_ts + FROM product_analytics.events AS main + WHERE %s + AND main.session_id IN ( + SELECT s.session_id + FROM experimental.sessions AS s + WHERE %s + ) + %s + GROUP BY session_id + %s + INNER JOIN ( + SELECT * + FROM experimental.sessions AS s + WHERE %s + ) AS s ON (s.session_id = f.session_id) +` + +var noEventSubQueryTpl = ` +SELECT multiIf( + s.user_id IS NOT NULL AND s.user_id != '', s.user_id, + s.user_anonymous_id IS NOT NULL AND s.user_anonymous_id != '', s.user_anonymous_id, + toString(s.user_uuid)) AS user_id, + s.datetime AS datetime +FROM experimental.sessions AS s +WHERE %s +` + +var mainQueryTpl = ` +SELECT gs.generate_series AS timestamp, + COALESCE(COUNT(DISTINCT processed_sessions.user_id), 0) AS count +FROM generate_series(%d, %d, %d) AS gs +LEFT JOIN ( + %s +) AS processed_sessions ON (TRUE) +WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000) + AND processed_sessions.datetime < toDateTime((timestamp + %d) / 1000) +GROUP BY timestamp +ORDER BY timestamp; +` diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 721008ea8..b83790578 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -68,12 +68,14 @@ type MetricPayload struct { Series []Series `json:"series"` } +type FilterGroup struct { + Filters []Filter `json:"filters"` + EventsOrder EventOrder `json:"eventsOrder"` +} + type Series struct { - Name string `json:"name"` - Filter struct { - Filters []Filter `json:"filters"` - EventsOrder EventOrder `json:"eventsOrder"` - } `json:"filter"` + Name string `json:"name"` + Filter FilterGroup `json:"filter"` } type Filter struct { @@ -81,6 +83,7 @@ type Filter struct { IsEvent bool `json:"isEvent"` Value []string `json:"value"` Operator string `json:"operator"` + Source string `json:"source,omitempty"` Filters []Filter `json:"filters"` } @@ -109,11 +112,10 @@ const ( FilterClick FilterType = "click" FilterInput FilterType = "input" FilterLocation FilterType = "location" + FilterTag FilterType = "tag" FilterCustom FilterType = "customEvent" FilterFetch FilterType = "fetch" - FilterFetchStatusCode FilterType = "status" - FilterTag FilterType = "tag" - FilterNetworkRequest FilterType = "fetch" + FilterFetchStatusCode FilterType = "fetchStatusCode" // Subfilter FilterGraphQLRequest FilterType = "graphql" FilterStateAction FilterType = "stateAction" FilterError FilterType = "error" @@ -139,7 +141,7 @@ const ( OperatorStringIsNot = "isNot" OperatorStringIsUndefined = "isUndefined" OperatorStringNotOn = "notOn" - OperatorStringContains = "contains" + OperatorContains = "contains" OperatorStringNotContains = "notContains" OperatorStringStartsWith = "startsWith" OperatorStringEndsWith = "endsWith" diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index 1d82bd5da..b018d2ce9 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -13,10 +13,10 @@ type Payload struct { } type QueryBuilder interface { - Execute(p *Payload, conn db.Connector) (interface{}, error) + Execute(p Payload, conn db.Connector) (interface{}, error) } -func NewQueryBuilder(p *Payload) (QueryBuilder, error) { +func NewQueryBuilder(p Payload) (QueryBuilder, error) { switch p.MetricType { case MetricTypeTimeseries: return TimeSeriesQueryBuilder{}, nil @@ -29,122 +29,496 @@ func NewQueryBuilder(p *Payload) (QueryBuilder, error) { } } -func buildEventSubquery(p *Payload) string { - baseEventsWhere := buildBaseEventsWhere(p) - sequenceCond := buildSequenceCondition(p.Series) - sessionsWhere := buildSessionsWhere(p) +//func pickIDField(p Payload) string { +// if p.MetricOf == "userCount" { +// return "user_id" +// } +// return "session_id" +//} - if sequenceCond.seqPattern == "" { - return fmt.Sprintf(` -SELECT s.%[1]s AS %[1]s, - s.datetime AS datetime -FROM ( - SELECT main.session_id, - MIN(main.created_at) AS first_event_ts, - MAX(main.created_at) AS last_event_ts - FROM product_analytics.events AS main - WHERE %[2]s - GROUP BY session_id -) AS f -INNER JOIN ( - SELECT * - FROM experimental.sessions AS s - WHERE %[3]s -) AS s ON (s.session_id = f.session_id) -`, pickIDField(p), baseEventsWhere, sessionsWhere) - } +//func buildBaseEventsWhere(p Payload) string { +// ts := fmt.Sprintf( +// `(main.created_at >= toDateTime(%d / 1000) AND main.created_at <= toDateTime(%d / 1000))`, +// p.StartTimestamp, +// p.EndTimestamp, +// ) +// return fmt.Sprintf(`main.project_id = %d AND %s`, p.ProjectId, ts) +//} - return fmt.Sprintf(` -SELECT s.%[1]s AS %[1]s, - s.datetime AS datetime -FROM ( - SELECT main.session_id, - MIN(main.created_at) AS first_event_ts, - MAX(main.created_at) AS last_event_ts - FROM product_analytics.events AS main - WHERE %[2]s - GROUP BY session_id - HAVING sequenceMatch('%[3]s')(toDateTime(main.created_at), %[4]s) -) AS f -INNER JOIN ( - SELECT * - FROM experimental.sessions AS s - WHERE %[5]s -) AS s ON (s.session_id = f.session_id) -`, pickIDField(p), baseEventsWhere, sequenceCond.seqPattern, sequenceCond.seqEvents, sessionsWhere) -} +//func buildSessionsWhere(p Payload) string { +// ts := fmt.Sprintf( +// `(s.datetime >= toDateTime(%d / 1000) AND s.datetime <= toDateTime(%d / 1000))`, +// p.StartTimestamp, +// p.EndTimestamp, +// ) +// return fmt.Sprintf(`s.project_id = %d AND isNotNull(s.duration) AND %s`, p.ProjectId, ts) +//} -func pickIDField(p *Payload) string { - if p.MetricOf == "userCount" { - return "user_id" - } - return "session_id" -} +//type sequenceParts struct { +// seqPattern string +// seqEvents string +//} -func buildBaseEventsWhere(p *Payload) string { - ts := fmt.Sprintf( - `(main.created_at >= toDateTime(%d / 1000) AND main.created_at <= toDateTime(%d / 1000))`, - p.StartTimestamp, - p.EndTimestamp, - ) - return fmt.Sprintf(`main.project_id = %d AND %s`, p.ProjectId, ts) -} +//func buildSequenceCondition(series []Series) sequenceParts { +// var events []string +// for _, s := range series { +// if len(s.Filter.Filters) > 0 { +// events = append(events, buildOneSeriesSequence(s.Filter.Filters)) +// } +// } +// if len(events) < 2 { +// return sequenceParts{"", ""} +// } +// pattern := "" +// for i := 1; i <= len(events); i++ { +// pattern += fmt.Sprintf("(?%d)", i) +// } +// return sequenceParts{ +// seqPattern: pattern, +// seqEvents: strings.Join(events, ", "), +// } +//} -func buildSessionsWhere(p *Payload) string { - ts := fmt.Sprintf( - `(s.datetime >= toDateTime(%d / 1000) AND s.datetime <= toDateTime(%d / 1000))`, - p.StartTimestamp, - p.EndTimestamp, - ) - return fmt.Sprintf(`s.project_id = %d AND isNotNull(s.duration) AND %s`, p.ProjectId, ts) -} +//func buildOneSeriesSequence(filters []Filter) string { +// return strings.Join(buildFilterConditions(filters), " AND ") +//} +// +//func buildFilterConditions(filters []Filter) []string { +// var out []string +// for _, f := range filters { +// switch f.Type { +// case FilterClick: +// out = append(out, +// fmt.Sprintf(`(main."$event_name" = 'CLICK' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, +// strings.Join(f.Value, "','"))) +// case FilterInput: +// out = append(out, +// fmt.Sprintf(`(main."$event_name" = 'INPUT' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, +// strings.Join(f.Value, "','"))) +// +// default: +// out = append(out, +// fmt.Sprintf(`(main."$event_name" = '%s')`, strings.ToUpper(string(f.Type)))) +// } +// } +// return out +//} -type sequenceParts struct { - seqPattern string - seqEvents string -} - -func buildSequenceCondition(series []Series) sequenceParts { - var events []string - for _, s := range series { - if len(s.Filter.Filters) > 0 { - events = append(events, buildOneSeriesSequence(s.Filter.Filters)) +func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters []Filter) { + for _, f := range filters { + if f.IsEvent { + eventFilters = append(eventFilters, f) + } else { + sessionFilters = append(sessionFilters, f) } } - if len(events) < 2 { - return sequenceParts{"", ""} - } - pattern := "" - for i := 1; i <= len(events); i++ { - pattern += fmt.Sprintf("(?%d)", i) - } - return sequenceParts{ - seqPattern: pattern, - seqEvents: strings.Join(events, ", "), - } + return } -func buildOneSeriesSequence(filters []Filter) string { - return strings.Join(buildFilterConditions(filters), " AND ") -} +func buildEventsWhere(filters []Filter, order EventOrder) (eventConditions []string, having string) { + basicEventTypes := "(" + + strings.Join([]string{ + fmt.Sprintf("%s = 'CLICK'", ColEventName), + fmt.Sprintf("%s = 'INPUT'", ColEventName), + fmt.Sprintf("%s = 'LOCATION'", ColEventName), + fmt.Sprintf("%s = 'CUSTOM'", ColEventName), + fmt.Sprintf("%s = 'REQUEST'", ColEventName), + }, " OR ") + ")" -func buildFilterConditions(filters []Filter) []string { - var out []string + var seq []string for _, f := range filters { switch f.Type { case FilterClick: - out = append(out, - fmt.Sprintf(`(main."$event_name" = 'CLICK' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, - strings.Join(f.Value, "','"))) + seq = append(seq, seqCond("CLICK", "selector", f)) case FilterInput: - out = append(out, - fmt.Sprintf(`(main."$event_name" = 'INPUT' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, - strings.Join(f.Value, "','"))) - + seq = append(seq, seqCond("INPUT", "label", f)) + case FilterLocation: + seq = append(seq, seqCond("LOCATION", "url_path", f)) + case FilterCustom: + seq = append(seq, seqCond("CUSTOM", "name", f)) + case FilterFetch: + seq = append(seq, seqFetchCond("REQUEST", f)) + case FilterFetchStatusCode: + seq = append(seq, seqCond("REQUEST", "status", f)) default: - out = append(out, - fmt.Sprintf(`(main."$event_name" = '%s')`, strings.ToUpper(string(f.Type)))) + seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type)))) } } - return out + eventConditions = []string{basicEventTypes} + + // then => sequenceMatch + // or => OR + // and => AND + switch order { + case EventOrderThen: + var pattern []string + for i := range seq { + pattern = append(pattern, fmt.Sprintf("(?%d)", i+1)) + } + having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)", + strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n")) + case EventOrderAnd: + // build AND + having = strings.Join(seq, " AND ") + case EventOrderOr: + default: + // default => OR + var orParts []string + for _, p := range seq { + orParts = append(orParts, "("+p+")") + } + having = strings.Join(orParts, " OR ") + } + return eventConditions, having +} + +func buildSessionWhere(filters []Filter) []string { + var conds []string + for _, f := range filters { + switch f.Type { + case FilterUserCountry: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value))) + case FilterUserCity: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value))) + case FilterUserState: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value))) + case FilterUserId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value))) + case FilterUserAnonymousId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value))) + case FilterUserOs: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value))) + case FilterUserBrowser: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value))) + case FilterUserDevice: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value))) + case FilterPlatform: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value))) + case FilterRevId: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value))) + case FilterReferrer: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value))) + case FilterDuration: + if len(f.Value) == 2 { + conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0])) + conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1])) + } + case FilterUtmSource: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value))) + case FilterUtmMedium: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value))) + case FilterUtmCampaign: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value))) + case FilterMetadata: + conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value))) + } + } + // adding /n to each condition for better readability, can be removed. + for i := range conds { + conds[i] += "\n" + } + return conds +} + +func concatValues(v []string) string { + return strings.Join(v, "") +} + +func seqCond(eventName, key string, f Filter) string { + op := parseOperator(f.Operator) + return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')", + ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value)) +} + +func seqFetchCond(eventName string, f Filter) string { + w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))} + var extras []string + for _, c := range f.Filters { + switch c.Type { + case FilterFetch: + if len(c.Value) > 0 { + extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value))) + } + case FilterFetchStatusCode: + if len(c.Value) > 0 { + extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value))) + } + default: + // placeholder if needed + } + } + if len(extras) > 0 { + w = append(w, strings.Join(extras, " AND ")) + } + return "(" + strings.Join(w, " AND ") + ")" +} + +func parseOperator(op string) string { + // TODO implement this properly + switch strings.ToLower(op) { + case OperatorContains: + return "LIKE" + case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny: + return "=" + case OperatorStringStartsWith: + return "LIKE" + case OperatorStringEndsWith: + // might interpret differently in real impl + return "=" + default: + return "=" + } +} + +func buildEventConditions(filters []Filter) (conds, names []string) { + for _, f := range filters { + if f.IsEvent { + switch f.Type { + case FilterClick: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, "is") + if c != "" { + conds = append(conds, c) + } + names = append(names, "CLICK") + case FilterInput: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "INPUT") + case FilterLocation: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "LOCATION") + case FilterCustom: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'name')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "CUSTOM") + case FilterFetch: + var fetchConds []string + for _, nf := range f.Filters { + switch nf.Type { + case "fetchUrl": + c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", nf.Value, f.Operator) + if c != "" { + fetchConds = append(fetchConds, c) + } + case "fetchStatusCode": + c := buildCond("JSONExtractFloat(toString(main.`$properties`), 'status')", nf.Value, f.Operator) + if c != "" { + fetchConds = append(fetchConds, c) + } + } + } + if len(fetchConds) > 0 { + conds = append(conds, strings.Join(fetchConds, " AND ")) + } + names = append(names, "REQUEST") + case FilterTag: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'tag')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "TAG") + } + } + } + return +} + +func buildSessionConditions(filters []Filter) []string { + var conds []string + for _, f := range filters { + if !f.IsEvent { + switch f.Type { + case FilterUserCountry: + conds = append(conds, buildCond("s.user_country", f.Value, f.Operator)) + case FilterUserCity: + conds = append(conds, buildCond("s.user_city", f.Value, f.Operator)) + case FilterUserState: + conds = append(conds, buildCond("s.user_state", f.Value, f.Operator)) + case FilterUserId: + conds = append(conds, buildCond("s.user_id", f.Value, f.Operator)) + case FilterUserAnonymousId: + conds = append(conds, buildCond("s.user_anonymous_id", f.Value, f.Operator)) + case FilterUserOs: + conds = append(conds, buildCond("s.user_os", f.Value, f.Operator)) + case FilterUserBrowser: + conds = append(conds, buildCond("s.user_browser", f.Value, f.Operator)) + case FilterUserDevice: + conds = append(conds, buildCond("s.user_device", f.Value, f.Operator)) + case FilterPlatform: + conds = append(conds, buildCond("s.user_device_type", f.Value, f.Operator)) + case FilterRevId: + conds = append(conds, buildCond("s.rev_id", f.Value, f.Operator)) + case FilterReferrer: + conds = append(conds, buildCond("s.base_referrer", f.Value, f.Operator)) + case FilterDuration: + if len(f.Value) == 2 { + conds = append(conds, fmt.Sprintf("s.duration >= '%s'", f.Value[0])) + conds = append(conds, fmt.Sprintf("s.duration <= '%s'", f.Value[1])) + } + case FilterUtmSource: + conds = append(conds, buildCond("s.utm_source", f.Value, f.Operator)) + case FilterUtmMedium: + conds = append(conds, buildCond("s.utm_medium", f.Value, f.Operator)) + case FilterUtmCampaign: + conds = append(conds, buildCond("s.utm_campaign", f.Value, f.Operator)) + case FilterMetadata: + if f.Source != "" { + conds = append(conds, buildCond(fmt.Sprintf("s.%s", f.Source), f.Value, f.Operator)) + } + } + } + } + return conds +} + +func buildCond(expr string, values []string, operator string) string { + if len(values) == 0 { + return "" + } + switch operator { + case "contains": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("%s ILIKE '%%%s%%'", expr, v)) + } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + case "notContains": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("NOT (%s ILIKE '%%%s%%')", expr, v)) + } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + case "startsWith": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("%s ILIKE '%s%%'", expr, v)) + } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + case "endsWith": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("%s ILIKE '%%%s'", expr, v)) + } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + default: + if len(values) > 1 { + var quoted []string + for _, v := range values { + quoted = append(quoted, fmt.Sprintf("'%s'", v)) + } + return fmt.Sprintf("%s IN (%s)", expr, strings.Join(quoted, ",")) + } + return fmt.Sprintf("%s = '%s'", expr, values[0]) + } +} + +func buildInClause(values []string) string { + var quoted []string + for _, v := range values { + quoted = append(quoted, fmt.Sprintf("'%s'", v)) + } + return strings.Join(quoted, ",") +} + +func buildStaticEventWhere(p Payload) string { + return strings.Join([]string{ + fmt.Sprintf("main.project_id = %d", p.ProjectId), + fmt.Sprintf("main.created_at >= toDateTime(%d / 1000)", p.StartTimestamp), + fmt.Sprintf("main.created_at <= toDateTime(%d / 1000)", p.EndTimestamp), + }, " AND ") +} + +func buildStaticSessionWhere(p Payload, sessionConds []string) (string, string) { + static := []string{fmt.Sprintf("s.project_id = %d", p.ProjectId)} + sessWhere := strings.Join(static, " AND ") + if len(sessionConds) > 0 { + sessWhere += " AND " + strings.Join(sessionConds, " AND ") + } + sessJoin := strings.Join(append(static, append(sessionConds, + fmt.Sprintf("s.datetime >= toDateTime(%d / 1000)", p.StartTimestamp), + fmt.Sprintf("s.datetime <= toDateTime(%d / 1000)", p.EndTimestamp))...), " AND ") + return sessWhere, sessJoin +} + +func buildHavingClause(conds []string) string { + seqConds := append([]string{}, conds...) + if len(seqConds) == 1 { + seqConds = append(seqConds, "1") + } + if len(seqConds) == 0 { + return "" + } + var parts []string + for i := range seqConds { + parts = append(parts, fmt.Sprintf("(?%d)", i+1)) + } + pattern := strings.Join(parts, "") + args := []string{"toDateTime(main.created_at)"} + args = append(args, seqConds...) + return fmt.Sprintf("HAVING sequenceMatch('%s')(%s)) AS f", pattern, strings.Join(args, ",\n ")) +} + +func contains(slice []string, s string) bool { + for _, v := range slice { + if v == s { + return true + } + } + return false +} + +func FillMissingDataPoints( + startTime, endTime int64, + density int, + neutral DataPoint, + rows []DataPoint, + timeCoefficient int64, +) []DataPoint { + if density <= 1 { + return rows + } + + stepSize := uint64(getStepSize(startTime, endTime, density, false, 1000)) + bucketSize := stepSize * uint64(timeCoefficient) + + lookup := make(map[uint64]DataPoint) + for _, dp := range rows { + if dp.Timestamp < uint64(startTime) { + continue + } + bucket := uint64(startTime) + (((dp.Timestamp - uint64(startTime)) / bucketSize) * bucketSize) + lookup[bucket] = dp + } + + results := make([]DataPoint, 0, density) + for i := 0; i < density; i++ { + ts := uint64(startTime) + uint64(i)*bucketSize + if dp, ok := lookup[ts]; ok { + results = append(results, dp) + } else { + nd := neutral + nd.Timestamp = ts + results = append(results, nd) + } + } + return results } From e1cd23063324d54caad207b32308e79352b4805d Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 18 Feb 2025 18:10:14 +0100 Subject: [PATCH 06/30] feat(analytics): filter operators --- backend/pkg/analytics/charts/query.go | 147 +++++++++++--------------- 1 file changed, 62 insertions(+), 85 deletions(-) diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index b018d2ce9..6ea789c8c 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -29,81 +29,6 @@ func NewQueryBuilder(p Payload) (QueryBuilder, error) { } } -//func pickIDField(p Payload) string { -// if p.MetricOf == "userCount" { -// return "user_id" -// } -// return "session_id" -//} - -//func buildBaseEventsWhere(p Payload) string { -// ts := fmt.Sprintf( -// `(main.created_at >= toDateTime(%d / 1000) AND main.created_at <= toDateTime(%d / 1000))`, -// p.StartTimestamp, -// p.EndTimestamp, -// ) -// return fmt.Sprintf(`main.project_id = %d AND %s`, p.ProjectId, ts) -//} - -//func buildSessionsWhere(p Payload) string { -// ts := fmt.Sprintf( -// `(s.datetime >= toDateTime(%d / 1000) AND s.datetime <= toDateTime(%d / 1000))`, -// p.StartTimestamp, -// p.EndTimestamp, -// ) -// return fmt.Sprintf(`s.project_id = %d AND isNotNull(s.duration) AND %s`, p.ProjectId, ts) -//} - -//type sequenceParts struct { -// seqPattern string -// seqEvents string -//} - -//func buildSequenceCondition(series []Series) sequenceParts { -// var events []string -// for _, s := range series { -// if len(s.Filter.Filters) > 0 { -// events = append(events, buildOneSeriesSequence(s.Filter.Filters)) -// } -// } -// if len(events) < 2 { -// return sequenceParts{"", ""} -// } -// pattern := "" -// for i := 1; i <= len(events); i++ { -// pattern += fmt.Sprintf("(?%d)", i) -// } -// return sequenceParts{ -// seqPattern: pattern, -// seqEvents: strings.Join(events, ", "), -// } -//} - -//func buildOneSeriesSequence(filters []Filter) string { -// return strings.Join(buildFilterConditions(filters), " AND ") -//} -// -//func buildFilterConditions(filters []Filter) []string { -// var out []string -// for _, f := range filters { -// switch f.Type { -// case FilterClick: -// out = append(out, -// fmt.Sprintf(`(main."$event_name" = 'CLICK' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, -// strings.Join(f.Value, "','"))) -// case FilterInput: -// out = append(out, -// fmt.Sprintf(`(main."$event_name" = 'INPUT' AND JSONExtractString(toString(main."$properties"), 'label') IN ('%s'))`, -// strings.Join(f.Value, "','"))) -// -// default: -// out = append(out, -// fmt.Sprintf(`(main."$event_name" = '%s')`, strings.ToUpper(string(f.Type)))) -// } -// } -// return out -//} - func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters []Filter) { for _, f := range filters { if f.IsEvent { @@ -357,17 +282,17 @@ func buildSessionConditions(filters []Filter) []string { conds = append(conds, buildCond("s.rev_id", f.Value, f.Operator)) case FilterReferrer: conds = append(conds, buildCond("s.base_referrer", f.Value, f.Operator)) - case FilterDuration: - if len(f.Value) == 2 { - conds = append(conds, fmt.Sprintf("s.duration >= '%s'", f.Value[0])) - conds = append(conds, fmt.Sprintf("s.duration <= '%s'", f.Value[1])) - } case FilterUtmSource: conds = append(conds, buildCond("s.utm_source", f.Value, f.Operator)) case FilterUtmMedium: conds = append(conds, buildCond("s.utm_medium", f.Value, f.Operator)) case FilterUtmCampaign: conds = append(conds, buildCond("s.utm_campaign", f.Value, f.Operator)) + case FilterDuration: + if len(f.Value) == 2 { + conds = append(conds, fmt.Sprintf("s.duration >= '%s'", f.Value[0])) + conds = append(conds, fmt.Sprintf("s.duration <= '%s'", f.Value[1])) + } case FilterMetadata: if f.Source != "" { conds = append(conds, buildCond(fmt.Sprintf("s.%s", f.Source), f.Value, f.Operator)) @@ -419,13 +344,65 @@ func buildCond(expr string, values []string, operator string) string { return "(" + strings.Join(conds, " OR ") + ")" } return conds[0] + case "notEquals": + if len(values) > 1 { + return fmt.Sprintf("%s NOT IN (%s)", expr, buildInClause(values)) + } + return fmt.Sprintf("%s <> '%s'", expr, values[0]) + case "greaterThan": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("%s > '%s'", expr, v)) + } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + case "greaterThanOrEqual": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("%s >= '%s'", expr, v)) + } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + case "lessThan": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("%s < '%s'", expr, v)) + } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + case "lessThanOrEqual": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("%s <= '%s'", expr, v)) + } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + case "in": + if len(values) > 1 { + return fmt.Sprintf("%s IN (%s)", expr, buildInClause(values)) + } + return fmt.Sprintf("%s = '%s'", expr, values[0]) + case "notIn": + if len(values) > 1 { + return fmt.Sprintf("%s NOT IN (%s)", expr, buildInClause(values)) + } + return fmt.Sprintf("%s <> '%s'", expr, values[0]) + case "equals", "is": + if len(values) > 1 { + return fmt.Sprintf("%s IN (%s)", expr, buildInClause(values)) + } + return fmt.Sprintf("%s = '%s'", expr, values[0]) default: if len(values) > 1 { - var quoted []string - for _, v := range values { - quoted = append(quoted, fmt.Sprintf("'%s'", v)) - } - return fmt.Sprintf("%s IN (%s)", expr, strings.Join(quoted, ",")) + return fmt.Sprintf("%s IN (%s)", expr, buildInClause(values)) } return fmt.Sprintf("%s = '%s'", expr, values[0]) } From 6ad249bf6e71823e9840cd8e91eb722f61b776da Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Wed, 19 Feb 2025 16:11:13 +0100 Subject: [PATCH 07/30] feat(analytics): multi series results --- .../pkg/analytics/charts/metric_timeseries.go | 164 +++++++++++++----- 1 file changed, 124 insertions(+), 40 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_timeseries.go b/backend/pkg/analytics/charts/metric_timeseries.go index 976b42b1b..44aee857c 100644 --- a/backend/pkg/analytics/charts/metric_timeseries.go +++ b/backend/pkg/analytics/charts/metric_timeseries.go @@ -4,75 +4,109 @@ import ( "fmt" "log" "openreplay/backend/pkg/analytics/db" + "sort" "strings" ) type TimeSeriesQueryBuilder struct{} func (t TimeSeriesQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { - query, err := t.buildQuery(p) - if err != nil { - log.Fatalf("Error building query: %v", err) - return nil, err - } + consolidated := map[uint64]map[string]uint64{} - rows, err := conn.Query(query) - if err != nil { - log.Fatalf("Error executing query: %v", err) - return nil, err - } - defer rows.Close() - - var results []DataPoint - for rows.Next() { - var res DataPoint - if err := rows.Scan(&res.Timestamp, &res.Count); err != nil { + for _, s := range p.Series { + query, err := t.buildQuery(p, s) + if err != nil { + log.Fatalf("Error building query for series %s: %v", s.Name, err) return nil, err } - //sum += res.Count - results = append(results, res) + + rows, err := conn.Query(query) + if err != nil { + log.Fatalf("Error executing query for series %s: %v", s.Name, err) + return nil, err + } + + var results []DataPoint + for rows.Next() { + var res DataPoint + if err := rows.Scan(&res.Timestamp, &res.Count); err != nil { + rows.Close() + return nil, err + } + results = append(results, res) + } + rows.Close() + + filled := FillMissingDataPoints(p.StartTimestamp, p.EndTimestamp, p.Density, DataPoint{}, results, 1000) + for _, dp := range filled { + if _, ok := consolidated[dp.Timestamp]; !ok { + consolidated[dp.Timestamp] = map[string]uint64{} + } + consolidated[dp.Timestamp][s.Name] = dp.Count + } } - filled := FillMissingDataPoints(p.StartTimestamp, p.EndTimestamp, p.Density, DataPoint{}, results, 1000) - return filled, nil + var timestamps []uint64 + for ts := range consolidated { + timestamps = append(timestamps, ts) + } + sort.Slice(timestamps, func(i, j int) bool { return timestamps[i] < timestamps[j] }) + + var finalResults []map[string]interface{} + for _, ts := range timestamps { + row := map[string]interface{}{"timestamp": ts} + for _, s := range p.Series { + if count, ok := consolidated[ts][s.Name]; ok { + row[s.Name] = count + } else { + row[s.Name] = uint64(0) + } + } + finalResults = append(finalResults, row) + } + + return finalResults, nil } -func (t TimeSeriesQueryBuilder) buildQuery(p Payload) (string, error) { - query := "" +func (t TimeSeriesQueryBuilder) buildQuery(p Payload, s Series) (string, error) { + var query string switch p.MetricOf { case "sessionCount": - query = t.buildSessionCountQuery(p) + query = t.buildSessionCountQuery(p, s) case "userCount": - query = t.buildUserCountQuery(p) + query = t.buildUserCountQuery(p, s) default: query = "" } return query, nil } -func (TimeSeriesQueryBuilder) buildSessionCountQuery(p Payload) string { - eventConds, eventNames := buildEventConditions(p.Series[0].Filter.Filters) - sessionConds := buildSessionConditions(p.Series[0].Filter.Filters) +func (TimeSeriesQueryBuilder) buildSessionCountQuery(p Payload, s Series) string { + eventConds, eventNames := buildEventConditions(s.Filter.Filters) + sessionConds := buildSessionConditions(s.Filter.Filters) staticEvt := buildStaticEventWhere(p) sessWhere, sessJoin := buildStaticSessionWhere(p, sessionConds) - eventsSubQuery := buildEventsSubQuery(eventConds, eventNames, staticEvt, sessWhere, sessJoin) - mainQuery := buildMainQuery(p, eventsSubQuery) + eventsSubQuery := buildEventsSubQuery("sessionCount", eventConds, eventNames, staticEvt, sessWhere, sessJoin) + mainQuery := buildMainQuery(p, eventsSubQuery, "sessionCount") return mainQuery } -func (TimeSeriesQueryBuilder) buildUserCountQuery(p Payload) string { - eventConds, eventNames := buildEventConditions(p.Series[0].Filter.Filters) - sessionConds := buildSessionConditions(p.Series[0].Filter.Filters) +func (TimeSeriesQueryBuilder) buildUserCountQuery(p Payload, s Series) string { + eventConds, eventNames := buildEventConditions(s.Filter.Filters) + sessionConds := buildSessionConditions(s.Filter.Filters) staticEvt := buildStaticEventWhere(p) sessWhere, sessJoin := buildStaticSessionWhere(p, sessionConds) - eventsSubQuery := buildEventsSubQuery(eventConds, eventNames, staticEvt, sessWhere, sessJoin) - mainQuery := buildMainQuery(p, eventsSubQuery) + eventsSubQuery := buildEventsSubQuery("userCount", eventConds, eventNames, staticEvt, sessWhere, sessJoin) + mainQuery := buildMainQuery(p, eventsSubQuery, "userCount") return mainQuery } -func buildEventsSubQuery(eventConds, eventNames []string, staticEvt, sessWhere, sessJoin string) string { +func buildEventsSubQuery(metric string, eventConds, eventNames []string, staticEvt, sessWhere, sessJoin string) string { if len(eventConds) == 0 && len(eventNames) == 0 { - return fmt.Sprintf(noEventSubQueryTpl, sessJoin) + if metric == "sessionCount" { + return fmt.Sprintf(sessionNoFiltersSubQueryTpl, sessJoin) + } + return fmt.Sprintf(noFiltersSubQueryTpl, sessJoin) } var evtNameClause string var unique []string @@ -92,15 +126,21 @@ func buildEventsSubQuery(eventConds, eventNames []string, staticEvt, sessWhere, if len(eventConds) > 0 { evtWhere += " AND " + strings.Join(eventConds, " AND ") } - return fmt.Sprintf(eventsSubQueryTpl, evtWhere, sessWhere, evtNameClause, having, sessJoin) + if metric == "sessionCount" { + return fmt.Sprintf(sessionSubQueryTpl, evtWhere, sessWhere, evtNameClause, having, sessJoin) + } + return fmt.Sprintf(subQueryTpl, evtWhere, sessWhere, evtNameClause, having, sessJoin) } -func buildMainQuery(p Payload, subQuery string) string { +func buildMainQuery(p Payload, subQuery, metric string) string { step := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) + if metric == "sessionCount" { + return fmt.Sprintf(sessionMainQueryTpl, p.StartTimestamp, p.EndTimestamp, step, subQuery, step) + } return fmt.Sprintf(mainQueryTpl, p.StartTimestamp, p.EndTimestamp, step, subQuery, step) } -var eventsSubQueryTpl = ` +var subQueryTpl = ` SELECT multiIf( s.user_id IS NOT NULL AND s.user_id != '', s.user_id, s.user_anonymous_id IS NOT NULL AND s.user_anonymous_id != '', s.user_anonymous_id, @@ -127,7 +167,7 @@ FROM ( ) AS s ON (s.session_id = f.session_id) ` -var noEventSubQueryTpl = ` +var noFiltersSubQueryTpl = ` SELECT multiIf( s.user_id IS NOT NULL AND s.user_id != '', s.user_id, s.user_anonymous_id IS NOT NULL AND s.user_anonymous_id != '', s.user_anonymous_id, @@ -137,6 +177,37 @@ FROM experimental.sessions AS s WHERE %s ` +var sessionSubQueryTpl = ` +SELECT s.session_id AS session_id, + s.datetime AS datetime +FROM ( + SELECT main.session_id, + MIN(main.created_at) AS first_event_ts, + MAX(main.created_at) AS last_event_ts + FROM product_analytics.events AS main + WHERE %s + AND main.session_id IN ( + SELECT s.session_id + FROM experimental.sessions AS s + WHERE %s + ) + %s + GROUP BY session_id + %s + INNER JOIN ( + SELECT * + FROM experimental.sessions AS s + WHERE %s + ) AS s ON (s.session_id = f.session_id) +` + +var sessionNoFiltersSubQueryTpl = ` +SELECT s.session_id AS session_id, + s.datetime AS datetime +FROM experimental.sessions AS s +WHERE %s +` + var mainQueryTpl = ` SELECT gs.generate_series AS timestamp, COALESCE(COUNT(DISTINCT processed_sessions.user_id), 0) AS count @@ -149,3 +220,16 @@ WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000) GROUP BY timestamp ORDER BY timestamp; ` + +var sessionMainQueryTpl = ` +SELECT gs.generate_series AS timestamp, + COALESCE(COUNT(DISTINCT processed_sessions.session_id), 0) AS count +FROM generate_series(%d, %d, %d) AS gs +LEFT JOIN ( + %s +) AS processed_sessions ON (TRUE) +WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000) + AND processed_sessions.datetime < toDateTime((timestamp + %d) / 1000) +GROUP BY timestamp +ORDER BY timestamp; +` From 8711648ac7f80d0fbe5af99de7c6de56b0ce2b79 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Wed, 26 Feb 2025 12:43:54 +0100 Subject: [PATCH 08/30] feat(analytics): table charts wip --- backend/pkg/analytics/charts/metric_table.go | 222 +++++++++++++------ backend/pkg/analytics/charts/model.go | 15 ++ backend/pkg/analytics/charts/query.go | 160 +------------ backend/pkg/analytics/db/connector.go | 11 + 4 files changed, 186 insertions(+), 222 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index bf0fe279c..5db49cccd 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -8,81 +8,173 @@ import ( type TableQueryBuilder struct{} +type TableValue struct { + Name string `json:"name"` + Total uint64 `json:"total"` +} + +type TableResponse struct { + Total uint64 `json:"total"` + Count uint64 `json:"count"` + Values []TableValue `json:"values"` +} + func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { - return t.buildQuery(p) + // validate metricOf with MetricOfTable return error if empty or not supported + if p.MetricOf == "" { + return nil, fmt.Errorf("MetricOf is empty") + } + + // Validate that p.MetricOf is one of the supported MetricOfTable types + isValidMetricOf := false + switch MetricOfTable(p.MetricOf) { + case MetricOfTableBrowser, MetricOfTableDevice, MetricOfTableCountry, + MetricOfTableUserId, MetricOfTableIssues, MetricOfTableLocation, + MetricOfTableSessions, MetricOfTableErrors, MetricOfTableReferrer, + MetricOfTableFetch: + isValidMetricOf = true + } + + if !isValidMetricOf { + return nil, fmt.Errorf("unsupported MetricOf type: %s", p.MetricOf) + } + + query, err := t.buildQuery(p) + if err != nil { + return nil, err + } + rows, err := conn.Query(query) + if err != nil { + return nil, err + } + defer rows.Close() + + var ( + totalCount uint64 + rowsCount uint64 + values []TableValue + ) + + for rows.Next() { + var ( + total uint64 + name string + ) + if err := rows.Scan(&totalCount, &name, &total, &rowsCount); err != nil { + return nil, err + } + values = append(values, TableValue{Name: name, Total: total}) + } + + return &TableResponse{ + Total: totalCount, + Count: rowsCount, + Values: values, + }, nil } func (t TableQueryBuilder) buildQuery(r Payload) (string, error) { s := r.Series[0] - sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) - sessionWhere := buildSessionWhere(sessionFilters) - eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder) - subQuery := fmt.Sprintf( - "SELECT %s,\n"+ - " MIN(%s) AS first_event_ts,\n"+ - " MAX(%s) AS last_event_ts\n"+ - "FROM %s AS main\n"+ - "WHERE main.project_id = %%(project_id)s\n"+ - " AND %s >= toDateTime(%%(start_time)s/1000)\n"+ - " AND %s <= toDateTime(%%(end_time)s/1000)\n"+ - " AND (%s)\n"+ - "GROUP BY %s\n"+ - "HAVING %s", - ColEventSessionID, - ColEventTime, - ColEventTime, - TableEvents, - ColEventTime, - ColEventTime, - strings.Join(eventWhere, " OR "), - ColEventSessionID, - seqHaving, - ) - - joinQuery := fmt.Sprintf( - "SELECT *\n"+ - "FROM %s AS s\n"+ - "INNER JOIN (\n"+ - " SELECT DISTINCT ev.session_id, ev.`$current_url` AS url_path\n"+ - " FROM %s AS ev\n"+ - " WHERE ev.created_at >= toDateTime(%%(start_time)s/1000)\n"+ - " AND ev.created_at <= toDateTime(%%(end_time)s/1000)\n"+ - " AND ev.project_id = %%(project_id)s\n"+ - " AND ev.`$event_name` = 'LOCATION'\n"+ - ") AS extra_event USING (session_id)\n"+ - "WHERE s.project_id = %%(project_id)s\n"+ - " AND isNotNull(s.duration)\n"+ - " AND s.datetime >= toDateTime(%%(start_time)s/1000)\n"+ - " AND s.datetime <= toDateTime(%%(end_time)s/1000)\n", - TableSessions, - TableEvents, - ) - - if len(sessionWhere) > 0 { - joinQuery += " AND " + strings.Join(sessionWhere, " AND ") + "\n" + groupByColumn := r.MetricOf + if groupByColumn == "" { + return "", fmt.Errorf("MetricOf is empty") } - main := fmt.Sprintf( - "SELECT s.session_id AS session_id, s.url_path\n"+ - "FROM (\n%s\n) AS f\n"+ - "INNER JOIN (\n%s) AS s\n"+ - " ON (s.session_id = f.session_id)\n", - subQuery, - joinQuery, + sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) + eventConds, eventNames := buildEventConditions(eventFilters) + eventWhere := buildStaticEventWhere(r) + if len(eventConds) > 0 { + eventWhere += " AND " + strings.Join(eventConds, " AND ") + } + if len(eventNames) > 0 { + eventWhere += " AND main.`$event_name` IN (" + buildInClause(eventNames) + ")" + } + + sessionConds := buildSessionConditions(sessionFilters) + sessWhere, _ := buildStaticSessionWhere(r, sessionConds) + + // Build event subquery + var eventSubQuery string + if len(eventConds) > 0 { + // With HAVING clause + var pattern strings.Builder + for i := 0; i < len(eventConds); i++ { + fmt.Fprintf(&pattern, "(?%d)", i+1) + } + + var args strings.Builder + args.WriteString("toDateTime(main.created_at)") + for _, cond := range eventConds { + args.WriteString(",\n ") + args.WriteString(cond) + } + + eventSubQuery = fmt.Sprintf( + "SELECT main.session_id, MIN(main.created_at) AS first_event_ts, MAX(main.created_at) AS last_event_ts "+ + "FROM %s AS main "+ + "WHERE %s "+ + "AND main.session_id IN (SELECT s.session_id FROM %s AS s WHERE %s) "+ + "GROUP BY main.session_id "+ + "HAVING sequenceMatch('%s')(%s)", + TableEvents, + eventWhere, + TableSessions, + sessWhere, + pattern.String(), + args.String(), + ) + } else { + // No HAVING clause needed + eventSubQuery = fmt.Sprintf( + "SELECT main.session_id, MIN(main.created_at) AS first_event_ts, MAX(main.created_at) AS last_event_ts "+ + "FROM %s AS main "+ + "WHERE %s "+ + "AND main.session_id IN (SELECT s.session_id FROM %s AS s WHERE %s) "+ + "GROUP BY main.session_id", + TableEvents, + eventWhere, + TableSessions, + sessWhere, + ) + } + + sessionsQuery := fmt.Sprintf( + "SELECT * FROM %s AS s WHERE s.project_id = %d AND isNotNull(s.duration)%s AND s.datetime >= toDateTime(%d/1000) AND s.datetime <= toDateTime(%d/1000)", + TableSessions, + r.ProjectId, + func() string { + if sessWhere != "" { + return " AND " + sessWhere + } + return "" + }(), + r.StartTimestamp, + r.EndTimestamp, ) - final := fmt.Sprintf( - "SELECT COUNT(DISTINCT url_path) OVER () AS main_count,\n"+ - " url_path AS name,\n"+ - " COUNT(DISTINCT session_id) AS total,\n"+ - " COALESCE(SUM(COUNT(DISTINCT session_id)) OVER (), 0) AS total_count\n"+ - "FROM (\n%s) AS filtered_sessions\n"+ - "GROUP BY url_path\n"+ - "ORDER BY total DESC\n"+ - "LIMIT 200 OFFSET 0;", - main, + mainQuery := fmt.Sprintf( + "SELECT s.session_id AS session_id, s.%s AS %s FROM (%s) AS f INNER JOIN (%s) AS s ON s.session_id = f.session_id", + groupByColumn, groupByColumn, + eventSubQuery, + sessionsQuery, ) - return final, nil + finalQuery := fmt.Sprintf( + "SELECT COUNT(DISTINCT filtered_sessions.%s) OVER () AS main_count, "+ + "filtered_sessions.%s AS name, "+ + "COUNT(DISTINCT filtered_sessions.session_id) AS total, "+ + "(SELECT COUNT(DISTINCT session_id) FROM (%s) AS all_sessions) AS total_count "+ + "FROM (%s) AS filtered_sessions "+ + "GROUP BY filtered_sessions.%s "+ + "ORDER BY total DESC "+ + "LIMIT 0, 200;", + groupByColumn, + groupByColumn, + mainQuery, + mainQuery, + groupByColumn, + ) + + return finalQuery, nil } diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index b83790578..0e6aa3da4 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -68,6 +68,21 @@ type MetricPayload struct { Series []Series `json:"series"` } +type MetricOfTable string + +const ( + MetricOfTableBrowser MetricOfTable = "browser" + MetricOfTableDevice MetricOfTable = "device" + MetricOfTableCountry MetricOfTable = "country" + MetricOfTableUserId MetricOfTable = "userId" + MetricOfTableIssues MetricOfTable = "issues" + MetricOfTableLocation MetricOfTable = "location" + MetricOfTableSessions MetricOfTable = "sessions" + MetricOfTableErrors MetricOfTable = "errors" + MetricOfTableReferrer MetricOfTable = "referrer" + MetricOfTableFetch MetricOfTable = "fetch" +) + type FilterGroup struct { Filters []Filter `json:"filters"` EventsOrder EventOrder `json:"eventsOrder"` diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index 6ea789c8c..be8e2fe11 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -8,8 +8,9 @@ import ( type Payload struct { *MetricPayload - ProjectId int - UserId uint64 + GroupByColumn string // TODO remove this field + ProjectId int + UserId uint64 } type QueryBuilder interface { @@ -40,161 +41,6 @@ func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters [ return } -func buildEventsWhere(filters []Filter, order EventOrder) (eventConditions []string, having string) { - basicEventTypes := "(" + - strings.Join([]string{ - fmt.Sprintf("%s = 'CLICK'", ColEventName), - fmt.Sprintf("%s = 'INPUT'", ColEventName), - fmt.Sprintf("%s = 'LOCATION'", ColEventName), - fmt.Sprintf("%s = 'CUSTOM'", ColEventName), - fmt.Sprintf("%s = 'REQUEST'", ColEventName), - }, " OR ") + ")" - - var seq []string - for _, f := range filters { - switch f.Type { - case FilterClick: - seq = append(seq, seqCond("CLICK", "selector", f)) - case FilterInput: - seq = append(seq, seqCond("INPUT", "label", f)) - case FilterLocation: - seq = append(seq, seqCond("LOCATION", "url_path", f)) - case FilterCustom: - seq = append(seq, seqCond("CUSTOM", "name", f)) - case FilterFetch: - seq = append(seq, seqFetchCond("REQUEST", f)) - case FilterFetchStatusCode: - seq = append(seq, seqCond("REQUEST", "status", f)) - default: - seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type)))) - } - } - eventConditions = []string{basicEventTypes} - - // then => sequenceMatch - // or => OR - // and => AND - switch order { - case EventOrderThen: - var pattern []string - for i := range seq { - pattern = append(pattern, fmt.Sprintf("(?%d)", i+1)) - } - having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)", - strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n")) - case EventOrderAnd: - // build AND - having = strings.Join(seq, " AND ") - case EventOrderOr: - default: - // default => OR - var orParts []string - for _, p := range seq { - orParts = append(orParts, "("+p+")") - } - having = strings.Join(orParts, " OR ") - } - return eventConditions, having -} - -func buildSessionWhere(filters []Filter) []string { - var conds []string - for _, f := range filters { - switch f.Type { - case FilterUserCountry: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value))) - case FilterUserCity: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value))) - case FilterUserState: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value))) - case FilterUserId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value))) - case FilterUserAnonymousId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value))) - case FilterUserOs: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value))) - case FilterUserBrowser: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value))) - case FilterUserDevice: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value))) - case FilterPlatform: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value))) - case FilterRevId: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value))) - case FilterReferrer: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value))) - case FilterDuration: - if len(f.Value) == 2 { - conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0])) - conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1])) - } - case FilterUtmSource: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value))) - case FilterUtmMedium: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value))) - case FilterUtmCampaign: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value))) - case FilterMetadata: - conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value))) - } - } - // adding /n to each condition for better readability, can be removed. - for i := range conds { - conds[i] += "\n" - } - return conds -} - -func concatValues(v []string) string { - return strings.Join(v, "") -} - -func seqCond(eventName, key string, f Filter) string { - op := parseOperator(f.Operator) - return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')", - ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value)) -} - -func seqFetchCond(eventName string, f Filter) string { - w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))} - var extras []string - for _, c := range f.Filters { - switch c.Type { - case FilterFetch: - if len(c.Value) > 0 { - extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value))) - } - case FilterFetchStatusCode: - if len(c.Value) > 0 { - extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value))) - } - default: - // placeholder if needed - } - } - if len(extras) > 0 { - w = append(w, strings.Join(extras, " AND ")) - } - return "(" + strings.Join(w, " AND ") + ")" -} - -func parseOperator(op string) string { - // TODO implement this properly - switch strings.ToLower(op) { - case OperatorContains: - return "LIKE" - case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny: - return "=" - case OperatorStringStartsWith: - return "LIKE" - case OperatorStringEndsWith: - // might interpret differently in real impl - return "=" - default: - return "=" - } -} - func buildEventConditions(filters []Filter) (conds, names []string) { for _, f := range filters { if f.IsEvent { diff --git a/backend/pkg/analytics/db/connector.go b/backend/pkg/analytics/db/connector.go index c06dfa998..62b31068c 100644 --- a/backend/pkg/analytics/db/connector.go +++ b/backend/pkg/analytics/db/connector.go @@ -22,6 +22,7 @@ type TableResponse struct { type Connector interface { Stop() error Query(query string) (driver.Rows, error) + QueryArgs(query string, args map[string]interface{}) (driver.Rows, error) } type connectorImpl struct { @@ -62,3 +63,13 @@ func (c *connectorImpl) Query(query string) (driver.Rows, error) { return rows, nil } + +func (c *connectorImpl) QueryArgs(query string, args map[string]interface{}) (driver.Rows, error) { + rows, err := c.conn.Query(context.Background(), query, args) + if err != nil { + return nil, err + } + //defer rows.Close() + + return rows, nil +} From c077841b4e84ab0442d8ed6d7ec108096b9e0a8d Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Fri, 18 Apr 2025 14:39:19 +0200 Subject: [PATCH 09/30] feat(api): dev rebase --- backend/cmd/analytics/main.go | 12 +++++++----- backend/pkg/analytics/cards/cards.go | 19 +++++++++---------- .../pkg/analytics/dashboards/dashboards.go | 17 +++++++---------- backend/pkg/db/postgres/pool/pool.go | 9 +++++++++ 4 files changed, 32 insertions(+), 25 deletions(-) diff --git a/backend/cmd/analytics/main.go b/backend/cmd/analytics/main.go index 1a4b099dd..cb53cb338 100644 --- a/backend/cmd/analytics/main.go +++ b/backend/cmd/analytics/main.go @@ -8,8 +8,9 @@ import ( "openreplay/backend/pkg/db/postgres/pool" "openreplay/backend/pkg/logger" "openreplay/backend/pkg/metrics" - analyticsMetrics "openreplay/backend/pkg/metrics/analytics" - databaseMetrics "openreplay/backend/pkg/metrics/database" + //analyticsMetrics "openreplay/backend/pkg/metrics/analytics" + //databaseMetrics "openreplay/backend/pkg/metrics/database" + "openreplay/backend/pkg/metrics/database" "openreplay/backend/pkg/metrics/web" "openreplay/backend/pkg/server" "openreplay/backend/pkg/server/api" @@ -20,9 +21,10 @@ func main() { log := logger.New() cfg := analyticsConfig.New(log) webMetrics := web.New("analytics") - metrics.New(log, append(webMetrics.List(), append(analyticsMetrics.List(), databaseMetrics.List()...)...)) + dbMetrics := database.New("analytics") + metrics.New(log, append(webMetrics.List(), dbMetrics.List()...)) - pgConn, err := pool.New(cfg.Postgres.String()) + pgConn, err := pool.New(dbMetrics, cfg.Postgres.String()) if err != nil { log.Fatal(ctx, "can't init postgres connection: %s", err) } @@ -34,7 +36,7 @@ func main() { } defer chConn.Stop() - builder, err := analytics.NewServiceBuilder(log, cfg, webMetrics, pgConn, chConn) + builder, err := analytics.NewServiceBuilder(log, cfg, webMetrics, dbMetrics, pgConn, chConn) if err != nil { log.Fatal(ctx, "can't init services: %s", err) } diff --git a/backend/pkg/analytics/cards/cards.go b/backend/pkg/analytics/cards/cards.go index 1425a61f0..794db1c98 100644 --- a/backend/pkg/analytics/cards/cards.go +++ b/backend/pkg/analytics/cards/cards.go @@ -6,7 +6,6 @@ import ( "fmt" "strings" - "github.com/jackc/pgx/v4" "github.com/lib/pq" "openreplay/backend/pkg/db/postgres/pool" @@ -48,12 +47,12 @@ func (s *cardsImpl) Create(projectId int, userID uint64, req *CardCreateRequest) ctx := context.Background() defer func() { if err != nil { - tx.Rollback(ctx) + err := tx.TxRollback() if err != nil { return } } else { - err := tx.Commit(ctx) + err := tx.TxCommit() if err != nil { return } @@ -67,8 +66,8 @@ func (s *cardsImpl) Create(projectId int, userID uint64, req *CardCreateRequest) RETURNING metric_id, project_id, user_id, name, metric_type, view_type, metric_of, metric_value, metric_format, is_public, created_at, edited_at` card := &CardGetResponse{} - err = tx.QueryRow( - ctx, sql, + err = tx.TxQueryRow( + sql, projectId, userID, req.Name, req.MetricType, req.ViewType, req.MetricOf, req.MetricValue, req.MetricFormat, req.IsPublic, ).Scan( &card.CardID, @@ -98,7 +97,7 @@ func (s *cardsImpl) Create(projectId int, userID uint64, req *CardCreateRequest) return card, nil } -func (s *cardsImpl) CreateSeries(ctx context.Context, tx pgx.Tx, metricId int64, series []CardSeriesBase) []CardSeries { +func (s *cardsImpl) CreateSeries(ctx context.Context, tx *pool.Tx, metricId int64, series []CardSeriesBase) []CardSeries { if len(series) == 0 { return nil // No series to create } @@ -126,7 +125,7 @@ func (s *cardsImpl) CreateSeries(ctx context.Context, tx pgx.Tx, metricId int64, query := fmt.Sprintf(sql, strings.Join(values, ",")) s.log.Info(ctx, "Executing query: %s with args: %v", query, args) - rows, err := tx.Query(ctx, query, args...) + rows, err := tx.TxQuery(query, args...) if err != nil { s.log.Error(ctx, "failed to execute batch insert for series: %v", err) return nil @@ -359,12 +358,12 @@ func (s *cardsImpl) Update(projectId int, cardID int64, userID uint64, req *Card ctx := context.Background() defer func() { if err != nil { - tx.Rollback(ctx) + err := tx.TxRollback() if err != nil { return } } else { - err := tx.Commit(ctx) + err := tx.TxCommit() if err != nil { return } @@ -379,7 +378,7 @@ func (s *cardsImpl) Update(projectId int, cardID int64, userID uint64, req *Card RETURNING metric_id, project_id, user_id, name, metric_type, view_type, metric_of, metric_value, metric_format, is_public, created_at, edited_at` card := &CardGetResponse{} - err = tx.QueryRow(ctx, sql, + err = tx.TxQueryRow(sql, req.Name, req.MetricType, req.ViewType, req.MetricOf, req.MetricValue, req.MetricFormat, req.IsPublic, cardID, projectId, ).Scan( &card.CardID, &card.ProjectID, &card.UserID, &card.Name, &card.MetricType, &card.ViewType, &card.MetricOf, diff --git a/backend/pkg/analytics/dashboards/dashboards.go b/backend/pkg/analytics/dashboards/dashboards.go index cb260f572..057c9c830 100644 --- a/backend/pkg/analytics/dashboards/dashboards.go +++ b/backend/pkg/analytics/dashboards/dashboards.go @@ -1,7 +1,6 @@ package dashboards import ( - "context" "encoding/json" "errors" "fmt" @@ -336,15 +335,14 @@ func (s *dashboardsImpl) AddCards(projectId int, dashboardId int, userId uint64, return fmt.Errorf("failed to start transaction: %w", err) } - ctx := context.Background() defer func() { if err != nil { - tx.Rollback(ctx) + err := tx.TxRollback() if err != nil { return } } else { - err := tx.Commit(ctx) + err := tx.TxCommit() if err != nil { return } @@ -356,7 +354,7 @@ func (s *dashboardsImpl) AddCards(projectId int, dashboardId int, userId uint64, for _, metricID := range req.MetricIDs { // Check if the widget already exists var exists bool - err := tx.QueryRow(ctx, ` + err := tx.TxQueryRow(` SELECT EXISTS ( SELECT 1 FROM public.dashboard_widgets WHERE dashboard_id = $1 AND metric_id = $2 @@ -371,10 +369,9 @@ func (s *dashboardsImpl) AddCards(projectId int, dashboardId int, userId uint64, } // Insert new widget - _, err = tx.Exec(ctx, ` - INSERT INTO public.dashboard_widgets (dashboard_id, metric_id, user_id, config) - VALUES ($1, $2, $3, $4) - `, dashboardId, metricID, userId, req.Config) + query := `INSERT INTO public.dashboard_widgets (dashboard_id, metric_id, user_id, config) + VALUES ($1, $2, $3, $4)` + err = tx.TxExec(query, dashboardId, metricID, userId, req.Config) if err != nil { return fmt.Errorf("failed to insert widget: %w", err) } @@ -382,7 +379,7 @@ func (s *dashboardsImpl) AddCards(projectId int, dashboardId int, userId uint64, } // Commit transaction - if err := tx.Commit(ctx); err != nil { + if err := tx.TxCommit(); err != nil { return fmt.Errorf("failed to commit transaction: %w", err) } diff --git a/backend/pkg/db/postgres/pool/pool.go b/backend/pkg/db/postgres/pool/pool.go index f6d82e6c3..747654695 100644 --- a/backend/pkg/db/postgres/pool/pool.go +++ b/backend/pkg/db/postgres/pool/pool.go @@ -110,6 +110,15 @@ func (tx *Tx) TxExec(sql string, args ...interface{}) error { return err } +func (tx *Tx) TxQuery(sql string, args ...interface{}) (pgx.Rows, error) { + start := time.Now() + res, err := tx.origTx.Query(getTimeoutContext(), sql, args...) + method, table := methodName(sql) + tx.metrics.RecordRequestDuration(float64(time.Now().Sub(start).Milliseconds()), method, table) + tx.metrics.IncreaseTotalRequests(method, table) + return res, err +} + func (tx *Tx) TxQueryRow(sql string, args ...interface{}) pgx.Row { start := time.Now() res := tx.origTx.QueryRow(context.Background(), sql, args...) From 3c5844e4ad8768a5c439e54955200895d4b59105 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 22 Apr 2025 12:54:43 +0200 Subject: [PATCH 10/30] feat(product_analytics): table of cards --- backend/pkg/analytics/charts/charts.go | 5 +- backend/pkg/analytics/charts/metric_table.go | 258 +++++++++++-------- backend/pkg/analytics/charts/model.go | 19 +- backend/pkg/analytics/charts/query.go | 100 +++---- 4 files changed, 207 insertions(+), 175 deletions(-) diff --git a/backend/pkg/analytics/charts/charts.go b/backend/pkg/analytics/charts/charts.go index c460974cc..49477b574 100644 --- a/backend/pkg/analytics/charts/charts.go +++ b/backend/pkg/analytics/charts/charts.go @@ -2,7 +2,6 @@ package charts import ( "fmt" - "log" "openreplay/backend/pkg/analytics/db" "openreplay/backend/pkg/db/postgres/pool" "openreplay/backend/pkg/logger" @@ -39,12 +38,12 @@ func (s *chartsImpl) GetData(projectId int, userID uint64, req *MetricPayload) ( } qb, err := NewQueryBuilder(payload) if err != nil { - log.Fatalf("Error creating query builder: %v", err) + return nil, fmt.Errorf("error creating query builder: %v", err) } resp, err := qb.Execute(payload, s.chConn) if err != nil { - log.Fatalf("Error building query: %v", err) + return nil, fmt.Errorf("error executing query: %v", err) } return resp, nil diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index 5db49cccd..7a48e6e4c 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -2,10 +2,21 @@ package charts import ( "fmt" + "log" "openreplay/backend/pkg/analytics/db" "strings" ) +var validMetricOfValues = map[MetricOfTable]struct{}{ + MetricOfTableBrowser: {}, + MetricOfTableDevice: {}, + MetricOfTableCountry: {}, + MetricOfTableUserId: {}, + MetricOfTableLocation: {}, + MetricOfTableReferrer: {}, + MetricOfTableFetch: {}, +} + type TableQueryBuilder struct{} type TableValue struct { @@ -19,162 +30,181 @@ type TableResponse struct { Values []TableValue `json:"values"` } +const ( + MetricFormatSessionCount = "sessionCount" + MetricFormatUserCount = "userCount" + nilUUIDString = "00000000-0000-0000-0000-000000000000" +) + +var propertySelectorMap = map[string]string{ + string(MetricOfTableBrowser): "main.$browser AS metric_value", + string(MetricOfTableDevice): "main.$device AS metric_value", + string(MetricOfTableCountry): "main.$country AS metric_value", + string(MetricOfTableReferrer): "main.$referrer AS metric_value", +} + func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { - // validate metricOf with MetricOfTable return error if empty or not supported if p.MetricOf == "" { return nil, fmt.Errorf("MetricOf is empty") } - // Validate that p.MetricOf is one of the supported MetricOfTable types - isValidMetricOf := false - switch MetricOfTable(p.MetricOf) { - case MetricOfTableBrowser, MetricOfTableDevice, MetricOfTableCountry, - MetricOfTableUserId, MetricOfTableIssues, MetricOfTableLocation, - MetricOfTableSessions, MetricOfTableErrors, MetricOfTableReferrer, - MetricOfTableFetch: - isValidMetricOf = true + if _, ok := validMetricOfValues[MetricOfTable(p.MetricOf)]; !ok { + return nil, fmt.Errorf("invalid MetricOf value: %s", p.MetricOf) } - if !isValidMetricOf { - return nil, fmt.Errorf("unsupported MetricOf type: %s", p.MetricOf) + metricFormat := p.MetricFormat + if metricFormat != MetricFormatSessionCount && metricFormat != MetricFormatUserCount { + metricFormat = MetricFormatSessionCount } - query, err := t.buildQuery(p) + query, err := t.buildQuery(p, metricFormat) if err != nil { - return nil, err + return nil, fmt.Errorf("error building query: %w", err) } + rows, err := conn.Query(query) if err != nil { - return nil, err + log.Printf("Error executing query: %s\nQuery: %s", err, query) + return nil, fmt.Errorf("error executing query: %w", err) } defer rows.Close() var ( - totalCount uint64 - rowsCount uint64 - values []TableValue + overallTotalMetricValues uint64 + overallCount uint64 + values []TableValue + firstRow = true ) for rows.Next() { var ( - total uint64 - name string + name string + valueSpecificCount uint64 + tempOverallTotalMetricValues uint64 + tempOverallCount uint64 ) - if err := rows.Scan(&totalCount, &name, &total, &rowsCount); err != nil { - return nil, err + + if err := rows.Scan(&tempOverallTotalMetricValues, &name, &valueSpecificCount, &tempOverallCount); err != nil { + return nil, fmt.Errorf("error scanning row: %w", err) } - values = append(values, TableValue{Name: name, Total: total}) + + if firstRow { + overallTotalMetricValues = tempOverallTotalMetricValues + overallCount = tempOverallCount + firstRow = false + } + values = append(values, TableValue{Name: name, Total: valueSpecificCount}) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("error iterating rows: %w", err) } return &TableResponse{ - Total: totalCount, - Count: rowsCount, + Total: overallTotalMetricValues, + Count: overallCount, Values: values, }, nil } -func (t TableQueryBuilder) buildQuery(r Payload) (string, error) { +func (t TableQueryBuilder) buildQuery(r Payload, metricFormat string) (string, error) { + if len(r.Series) == 0 { + return "", fmt.Errorf("payload Series cannot be empty") + } s := r.Series[0] - groupByColumn := r.MetricOf - if groupByColumn == "" { + var propertyName string + if r.MetricOf == "" { return "", fmt.Errorf("MetricOf is empty") } + originalMetricOf := r.MetricOf + propertyName = originalMetricOf - sessionFilters, eventFilters := partitionFilters(s.Filter.Filters) + eventFilters := s.Filter.Filters eventConds, eventNames := buildEventConditions(eventFilters) - eventWhere := buildStaticEventWhere(r) + + baseWhereConditions := []string{ + fmt.Sprintf("main.created_at >= toDateTime(%d/1000)", r.StartTimestamp), + fmt.Sprintf("main.created_at <= toDateTime(%d/1000)", r.EndTimestamp), + "sessions.duration > 0", + } + + if r.ProjectId > 0 { + baseWhereConditions = append(baseWhereConditions, fmt.Sprintf("main.project_id = %d", r.ProjectId)) + } + + var aggregationExpression string + var aggregationAlias = "aggregation_id" + var specificWhereConditions []string + + if metricFormat == MetricFormatUserCount { + aggregationExpression = fmt.Sprintf("if(empty(sessions.user_id), toString(sessions.user_uuid), sessions.user_id)") + userExclusionCondition := fmt.Sprintf("NOT (empty(sessions.user_id) AND (sessions.user_uuid IS NULL OR sessions.user_uuid = '%s'))", nilUUIDString) + specificWhereConditions = append(specificWhereConditions, userExclusionCondition) + } else { + aggregationExpression = "main.session_id" + } + + var propertySelector string + var ok bool + propertySelector, ok = propertySelectorMap[originalMetricOf] + if !ok { + propertySelector = fmt.Sprintf("JSONExtractString(toString(main.$properties), '%s') AS metric_value", propertyName) + } + + allWhereConditions := baseWhereConditions if len(eventConds) > 0 { - eventWhere += " AND " + strings.Join(eventConds, " AND ") + allWhereConditions = append(allWhereConditions, eventConds...) } if len(eventNames) > 0 { - eventWhere += " AND main.`$event_name` IN (" + buildInClause(eventNames) + ")" + allWhereConditions = append(allWhereConditions, "main.`$event_name` IN ("+buildInClause(eventNames)+")") } + allWhereConditions = append(allWhereConditions, specificWhereConditions...) + whereClause := strings.Join(allWhereConditions, " AND ") - sessionConds := buildSessionConditions(sessionFilters) - sessWhere, _ := buildStaticSessionWhere(r, sessionConds) - - // Build event subquery - var eventSubQuery string - if len(eventConds) > 0 { - // With HAVING clause - var pattern strings.Builder - for i := 0; i < len(eventConds); i++ { - fmt.Fprintf(&pattern, "(?%d)", i+1) - } - - var args strings.Builder - args.WriteString("toDateTime(main.created_at)") - for _, cond := range eventConds { - args.WriteString(",\n ") - args.WriteString(cond) - } - - eventSubQuery = fmt.Sprintf( - "SELECT main.session_id, MIN(main.created_at) AS first_event_ts, MAX(main.created_at) AS last_event_ts "+ - "FROM %s AS main "+ - "WHERE %s "+ - "AND main.session_id IN (SELECT s.session_id FROM %s AS s WHERE %s) "+ - "GROUP BY main.session_id "+ - "HAVING sequenceMatch('%s')(%s)", - TableEvents, - eventWhere, - TableSessions, - sessWhere, - pattern.String(), - args.String(), - ) - } else { - // No HAVING clause needed - eventSubQuery = fmt.Sprintf( - "SELECT main.session_id, MIN(main.created_at) AS first_event_ts, MAX(main.created_at) AS last_event_ts "+ - "FROM %s AS main "+ - "WHERE %s "+ - "AND main.session_id IN (SELECT s.session_id FROM %s AS s WHERE %s) "+ - "GROUP BY main.session_id", - TableEvents, - eventWhere, - TableSessions, - sessWhere, - ) + limit := r.Limit + if limit <= 0 { + limit = 10 } + page := r.Page + if page <= 0 { + page = 1 + } + offset := (page - 1) * limit + limitClause := fmt.Sprintf("LIMIT %d OFFSET %d", limit, offset) - sessionsQuery := fmt.Sprintf( - "SELECT * FROM %s AS s WHERE s.project_id = %d AND isNotNull(s.duration)%s AND s.datetime >= toDateTime(%d/1000) AND s.datetime <= toDateTime(%d/1000)", - TableSessions, - r.ProjectId, - func() string { - if sessWhere != "" { - return " AND " + sessWhere - } - return "" - }(), - r.StartTimestamp, - r.EndTimestamp, - ) + query := fmt.Sprintf(` + WITH filtered_data AS ( + SELECT DISTINCT + %s, + %s AS %s + FROM product_analytics.events AS main + INNER JOIN experimental.sessions AS sessions ON main.session_id = sessions.session_id + WHERE %s + ), + grouped_values AS ( + SELECT + metric_value AS name, + countDistinct(%s) AS value_count + FROM filtered_data + WHERE name IS NOT NULL AND name != '' + GROUP BY name + ) + SELECT + (SELECT count() FROM grouped_values) AS overall_total_metric_values, + name, + value_count, + (SELECT countDistinct(%s) FROM filtered_data) AS overall_total_count + FROM grouped_values + ORDER BY value_count DESC + %s + `, + propertySelector, + aggregationExpression, + aggregationAlias, + whereClause, + aggregationAlias, + aggregationAlias, + limitClause) - mainQuery := fmt.Sprintf( - "SELECT s.session_id AS session_id, s.%s AS %s FROM (%s) AS f INNER JOIN (%s) AS s ON s.session_id = f.session_id", - groupByColumn, groupByColumn, - eventSubQuery, - sessionsQuery, - ) - - finalQuery := fmt.Sprintf( - "SELECT COUNT(DISTINCT filtered_sessions.%s) OVER () AS main_count, "+ - "filtered_sessions.%s AS name, "+ - "COUNT(DISTINCT filtered_sessions.session_id) AS total, "+ - "(SELECT COUNT(DISTINCT session_id) FROM (%s) AS all_sessions) AS total_count "+ - "FROM (%s) AS filtered_sessions "+ - "GROUP BY filtered_sessions.%s "+ - "ORDER BY total DESC "+ - "LIMIT 0, 200;", - groupByColumn, - groupByColumn, - mainQuery, - mainQuery, - groupByColumn, - ) - - return finalQuery, nil + return query, nil } diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 0e6aa3da4..e22abcbe5 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -66,21 +66,24 @@ type MetricPayload struct { ViewType string `json:"viewType"` Name string `json:"name"` Series []Series `json:"series"` + Limit int `json:"limit"` + Page int `json:"page"` } type MetricOfTable string const ( - MetricOfTableBrowser MetricOfTable = "browser" - MetricOfTableDevice MetricOfTable = "device" - MetricOfTableCountry MetricOfTable = "country" - MetricOfTableUserId MetricOfTable = "userId" - MetricOfTableIssues MetricOfTable = "issues" - MetricOfTableLocation MetricOfTable = "location" - MetricOfTableSessions MetricOfTable = "sessions" - MetricOfTableErrors MetricOfTable = "errors" + MetricOfTableLocation MetricOfTable = "url_path" // TOP Pages + MetricOfTableBrowser MetricOfTable = "user_browser" MetricOfTableReferrer MetricOfTable = "referrer" + MetricOfTableUserId MetricOfTable = "user_id" + MetricOfTableCountry MetricOfTable = "user_country" + MetricOfTableDevice MetricOfTable = "user_device" MetricOfTableFetch MetricOfTable = "fetch" + + //MetricOfTableIssues MetricOfTable = "issues" + //MetricOfTableSessions MetricOfTable = "sessions" + //MetricOfTableErrors MetricOfTable = "errors" ) type FilterGroup struct { diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index be8e2fe11..fe9018131 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -43,60 +43,60 @@ func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters [ func buildEventConditions(filters []Filter) (conds, names []string) { for _, f := range filters { - if f.IsEvent { - switch f.Type { - case FilterClick: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, "is") - if c != "" { - conds = append(conds, c) - } - names = append(names, "CLICK") - case FilterInput: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "INPUT") - case FilterLocation: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "LOCATION") - case FilterCustom: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'name')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "CUSTOM") - case FilterFetch: - var fetchConds []string - for _, nf := range f.Filters { - switch nf.Type { - case "fetchUrl": - c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", nf.Value, f.Operator) - if c != "" { - fetchConds = append(fetchConds, c) - } - case "fetchStatusCode": - c := buildCond("JSONExtractFloat(toString(main.`$properties`), 'status')", nf.Value, f.Operator) - if c != "" { - fetchConds = append(fetchConds, c) - } + //if f.IsEvent { + switch f.Type { + case FilterClick: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, "is") + if c != "" { + conds = append(conds, c) + } + names = append(names, "CLICK") + case FilterInput: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "INPUT") + case FilterLocation: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "LOCATION") + case FilterCustom: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'name')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "CUSTOM") + case FilterFetch: + var fetchConds []string + for _, nf := range f.Filters { + switch nf.Type { + case "fetchUrl": + c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", nf.Value, f.Operator) + if c != "" { + fetchConds = append(fetchConds, c) + } + case "fetchStatusCode": + c := buildCond("JSONExtractFloat(toString(main.`$properties`), 'status')", nf.Value, f.Operator) + if c != "" { + fetchConds = append(fetchConds, c) } } - if len(fetchConds) > 0 { - conds = append(conds, strings.Join(fetchConds, " AND ")) - } - names = append(names, "REQUEST") - case FilterTag: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'tag')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "TAG") } + if len(fetchConds) > 0 { + conds = append(conds, strings.Join(fetchConds, " AND ")) + } + names = append(names, "REQUEST") + case FilterTag: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'tag')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "TAG") } + //} } return } From 942dcbbd8debab02d2ea69cffc3be5f34d47d688 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 22 Apr 2025 13:23:19 +0200 Subject: [PATCH 11/30] feat(product_analytics): timeseries error message --- backend/pkg/analytics/charts/metric_timeseries.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_timeseries.go b/backend/pkg/analytics/charts/metric_timeseries.go index 44aee857c..419cb322b 100644 --- a/backend/pkg/analytics/charts/metric_timeseries.go +++ b/backend/pkg/analytics/charts/metric_timeseries.go @@ -16,14 +16,14 @@ func (t TimeSeriesQueryBuilder) Execute(p Payload, conn db.Connector) (interface for _, s := range p.Series { query, err := t.buildQuery(p, s) if err != nil { - log.Fatalf("Error building query for series %s: %v", s.Name, err) - return nil, err + log.Printf("Error building query for series %s: %v", s.Name, err) + return nil, fmt.Errorf("error building query for series %s: %v", s.Name, err) } rows, err := conn.Query(query) if err != nil { - log.Fatalf("Error executing query for series %s: %v", s.Name, err) - return nil, err + log.Printf("Error executing query for series %s: %v", s.Name, err) + return nil, fmt.Errorf("error executing query for series %s: %v", s.Name, err) } var results []DataPoint From 6e57d2105d9eb29e65364eb62ba827aceafc514f Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 22 Apr 2025 15:09:39 +0200 Subject: [PATCH 12/30] feat(product_analytics): handle filters dynamically --- backend/pkg/analytics/charts/metric_table.go | 11 +- backend/pkg/analytics/charts/query.go | 212 +++++++++++++++---- 2 files changed, 178 insertions(+), 45 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index 7a48e6e4c..0c4eb0f68 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -43,6 +43,13 @@ var propertySelectorMap = map[string]string{ string(MetricOfTableReferrer): "main.$referrer AS metric_value", } +var mainColumns = map[string]string{ + "user_browser": "$browser", + "user_device": "$device_type", + "user_country": "$country", + "referrer": "$referrer", +} + func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { if p.MetricOf == "" { return nil, fmt.Errorf("MetricOf is empty") @@ -120,7 +127,9 @@ func (t TableQueryBuilder) buildQuery(r Payload, metricFormat string) (string, e propertyName = originalMetricOf eventFilters := s.Filter.Filters - eventConds, eventNames := buildEventConditions(eventFilters) + eventConds, eventNames := buildEventConditions(eventFilters, BuildConditionsOptions{ + DefinedColumns: mainColumns, + }) baseWhereConditions := []string{ fmt.Sprintf("main.created_at >= toDateTime(%d/1000)", r.StartTimestamp), diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index fe9018131..c40cc70ea 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -41,62 +41,186 @@ func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters [ return } -func buildEventConditions(filters []Filter) (conds, names []string) { +var validFilterTypes = map[FilterType]struct{}{ + FilterClick: {}, + FilterInput: {}, + FilterLocation: {}, + FilterCustom: {}, + FilterFetch: {}, + FilterTag: {}, + FilterUserCountry: {}, + FilterUserCity: {}, + FilterUserState: {}, + FilterUserId: {}, + FilterUserAnonymousId: {}, + FilterUserOs: {}, + FilterUserBrowser: {}, + FilterUserDevice: {}, + FilterPlatform: {}, + FilterRevId: {}, + FilterReferrer: {}, + FilterUtmSource: {}, + FilterUtmMedium: {}, + FilterUtmCampaign: {}, + FilterDuration: {}, + FilterMetadata: {}, +} + +type BuildConditionsOptions struct { + MainTableAlias string + PropertiesColumnName string + DefinedColumns map[string]string +} + +type filterConfig struct { + LogicalProperty string + EventName string + IsNumeric bool +} + +var filterTypeConfigs = map[FilterType]filterConfig{ + FilterClick: {LogicalProperty: "label", EventName: "CLICK"}, + FilterInput: {LogicalProperty: "label", EventName: "INPUT"}, + FilterLocation: {LogicalProperty: "url_path", EventName: "LOCATION"}, + FilterCustom: {LogicalProperty: "name", EventName: "CUSTOM"}, + FilterTag: {LogicalProperty: "tag", EventName: "TAG"}, +} + +var nestedFilterTypeConfigs = map[string]filterConfig{ + "fetchUrl": {LogicalProperty: "url_path"}, + "fetchStatusCode": {LogicalProperty: "status", IsNumeric: true}, +} + +func getColumnAccessor(logicalProp string, isNumeric bool, opts BuildConditionsOptions) string { + if actualCol, ok := opts.DefinedColumns[logicalProp]; ok && actualCol != "" { + return fmt.Sprintf("%s.`%s`", opts.MainTableAlias, actualCol) + } + + jsonFunc := "JSONExtractString" + if isNumeric { + jsonFunc = "JSONExtractFloat" // Or JSONExtractInt, etc. + } + + return fmt.Sprintf("%s(toString(%s.`%s`), '%s')", + jsonFunc, opts.MainTableAlias, opts.PropertiesColumnName, logicalProp) +} + +func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) (conds, names []string) { + opts := BuildConditionsOptions{ + MainTableAlias: "main", + PropertiesColumnName: "$properties", + DefinedColumns: make(map[string]string), + } + + if len(options) > 0 { + if options[0].MainTableAlias != "" { + opts.MainTableAlias = options[0].MainTableAlias + } + if options[0].PropertiesColumnName != "" { + opts.PropertiesColumnName = options[0].PropertiesColumnName + } + if options[0].DefinedColumns != nil { + opts.DefinedColumns = options[0].DefinedColumns + } + } + for _, f := range filters { - //if f.IsEvent { - switch f.Type { - case FilterClick: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, "is") - if c != "" { - conds = append(conds, c) - } - names = append(names, "CLICK") - case FilterInput: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "INPUT") - case FilterLocation: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "LOCATION") - case FilterCustom: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'name')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "CUSTOM") - case FilterFetch: + _, isValidType := validFilterTypes[f.Type] + if !isValidType || !f.IsEvent { + continue + } + + if f.Type == FilterFetch { var fetchConds []string for _, nf := range f.Filters { - switch nf.Type { - case "fetchUrl": - c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", nf.Value, f.Operator) - if c != "" { - fetchConds = append(fetchConds, c) - } - case "fetchStatusCode": - c := buildCond("JSONExtractFloat(toString(main.`$properties`), 'status')", nf.Value, f.Operator) - if c != "" { - fetchConds = append(fetchConds, c) - } + nestedConfig, ok := nestedFilterTypeConfigs[string(nf.Type)] + if !ok { + continue + } + + accessor := getColumnAccessor(nestedConfig.LogicalProperty, nestedConfig.IsNumeric, opts) + c := buildCond(accessor, nf.Value, f.Operator) // Uses parent filter's operator + if c != "" { + fetchConds = append(fetchConds, c) } } if len(fetchConds) > 0 { conds = append(conds, strings.Join(fetchConds, " AND ")) + names = append(names, "REQUEST") } - names = append(names, "REQUEST") - case FilterTag: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'tag')", f.Value, f.Operator) + } else { + config, ok := filterTypeConfigs[f.Type] + if !ok { + continue + } + + accessor := getColumnAccessor(config.LogicalProperty, config.IsNumeric, opts) + c := buildCond(accessor, f.Value, f.Operator) if c != "" { conds = append(conds, c) + names = append(names, config.EventName) + } + } + } + return +} + +func buildEventConditionsX(filters []Filter) (conds, names []string) { + for _, f := range filters { + if f.IsEvent { + switch f.Type { + case FilterClick: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, "is") + if c != "" { + conds = append(conds, c) + } + names = append(names, "CLICK") + case FilterInput: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "INPUT") + case FilterLocation: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "LOCATION") + case FilterCustom: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'name')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "CUSTOM") + case FilterFetch: + var fetchConds []string + for _, nf := range f.Filters { + switch nf.Type { + case "fetchUrl": + c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", nf.Value, f.Operator) + if c != "" { + fetchConds = append(fetchConds, c) + } + case "fetchStatusCode": + c := buildCond("JSONExtractFloat(toString(main.`$properties`), 'status')", nf.Value, f.Operator) + if c != "" { + fetchConds = append(fetchConds, c) + } + } + } + if len(fetchConds) > 0 { + conds = append(conds, strings.Join(fetchConds, " AND ")) + } + names = append(names, "REQUEST") + case FilterTag: + c := buildCond("JSONExtractString(toString(main.`$properties`), 'tag')", f.Value, f.Operator) + if c != "" { + conds = append(conds, c) + } + names = append(names, "TAG") } - names = append(names, "TAG") } - //} } return } From 4204b41dbdf126b93103fad5917bb161c5722653 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Fri, 25 Apr 2025 18:04:10 +0200 Subject: [PATCH 13/30] feat(product_analytics): funnels card --- backend/pkg/analytics/charts/metric_funnel.go | 154 +++++++++++++++++- backend/pkg/analytics/charts/metric_table.go | 9 +- backend/pkg/analytics/charts/query.go | 30 ++-- 3 files changed, 173 insertions(+), 20 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_funnel.go b/backend/pkg/analytics/charts/metric_funnel.go index 9de3a9dee..63fc19a41 100644 --- a/backend/pkg/analytics/charts/metric_funnel.go +++ b/backend/pkg/analytics/charts/metric_funnel.go @@ -1,9 +1,159 @@ package charts -import "openreplay/backend/pkg/analytics/db" +import ( + "fmt" + "openreplay/backend/pkg/analytics/db" + "strings" +) + +type FunnelStepResult struct { + LevelNumber uint64 `json:"step"` + StepName string `json:"type"` + CountAtLevel uint64 `json:"count"` +} + +type FunnelResponse struct { + Steps []FunnelStepResult `json:"stages"` +} type FunnelQueryBuilder struct{} func (f FunnelQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { - return "-- Funnel query placeholder", nil + q, err := f.buildQuery(p) + if err != nil { + return nil, err + } + rows, err := conn.Query(q) + if err != nil { + return nil, err + } + defer rows.Close() + + var steps []FunnelStepResult + for rows.Next() { + var r FunnelStepResult + if err := rows.Scan(&r.LevelNumber, &r.StepName, &r.CountAtLevel); err != nil { + return nil, err + } + steps = append(steps, r) + } + return FunnelResponse{Steps: steps}, nil +} + +func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { + if len(p.MetricPayload.Series) == 0 { + return "", fmt.Errorf("series empty") + } + + s := p.MetricPayload.Series[0] + metricFormat := p.MetricPayload.MetricFormat + + // separate global vs step filters based on IsEvent flag + var globalFilters []Filter + var eventFilters []Filter + for _, flt := range s.Filter.Filters { + if flt.IsEvent { + eventFilters = append(eventFilters, flt) + } else { + globalFilters = append(globalFilters, flt) + } + } + + // Global filters + globalConds, globalNames := buildEventConditions(globalFilters, BuildConditionsOptions{ + DefinedColumns: mainColumns, + MainTableAlias: "e", + }) + base := []string{ + fmt.Sprintf("e.created_at >= toDateTime(%d/1000)", p.MetricPayload.StartTimestamp), + fmt.Sprintf("e.created_at < toDateTime(%d/1000)", p.MetricPayload.EndTimestamp+86400000), + "s.duration > 0", + fmt.Sprintf("e.project_id = %d", p.ProjectId), + } + base = append(base, globalConds...) + if len(globalNames) > 0 { + base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") + } + + // Build steps and per-step conditions only for eventFilters + var stepNames []string + var stepExprs []string + for i, filter := range eventFilters { + // Step name from filter type + stepNames = append(stepNames, fmt.Sprintf("'%s'", filter.Type)) + exprs, _ := buildEventConditions([]Filter{filter}, BuildConditionsOptions{DefinedColumns: mainColumns}) + // replace main.$properties references + for j, c := range exprs { + c = strings.ReplaceAll(c, "toString(main.`$properties`)", "properties") + c = strings.ReplaceAll(c, "main.`$properties`", "properties") + // wrap JSON for JSONExtractString + c = strings.ReplaceAll(c, "JSONExtractString(properties", "JSONExtractString(toString(properties)") + exprs[j] = c + } + var expr string + if len(exprs) > 0 { + expr = fmt.Sprintf("(event_name = funnel_steps[%d] AND %s)", i+1, strings.Join(exprs, " AND ")) + } else { + expr = fmt.Sprintf("(event_name = funnel_steps[%d])", i+1) + } + stepExprs = append(stepExprs, expr) + } + stepsArr := "[" + strings.Join(stepNames, ",") + "]" + windowArgs := strings.Join(stepExprs, ",") + + // Compose WHERE clause + where := strings.Join(base, " AND ") + + // Final query + q := fmt.Sprintf(` +WITH + %s AS funnel_steps, + 86400 AS funnel_window_seconds, + events_for_funnel AS ( + SELECT + e.created_at, + e."$event_name" AS event_name, + e."$properties" AS properties, + e.session_id, + e.distinct_id, + s.user_id AS session_user_id, + if('%s' = 'sessionCount', toString(e.session_id), coalesce(nullif(s.user_id,''),e.distinct_id)) AS entity_id + FROM product_analytics.events AS e + JOIN experimental.sessions AS s USING(session_id) + WHERE %s + ), + funnel_levels_reached AS ( + SELECT + entity_id, + windowFunnel(funnel_window_seconds)( + toDateTime(created_at), + %s + ) AS max_level + FROM events_for_funnel + GROUP BY entity_id + ), + counts_by_level AS ( + SELECT + seq.number + 1 AS level_number, + countDistinctIf(entity_id, max_level >= seq.number + 1) AS cnt + FROM funnel_levels_reached + CROSS JOIN numbers(length(funnel_steps)) AS seq + GROUP BY seq.number + ), + step_list AS ( + SELECT + seq.number + 1 AS level_number, + funnel_steps[seq.number + 1] AS step_name + FROM numbers(length(funnel_steps)) AS seq + ) +SELECT + s.level_number, + s.step_name, + ifNull(c.cnt, 0) AS count_at_level +FROM step_list AS s +LEFT JOIN counts_by_level AS c ON s.level_number = c.level_number +ORDER BY s.level_number; +`, stepsArr, metricFormat, where, windowArgs) + + return q, nil } diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index 0c4eb0f68..b1313d731 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -44,10 +44,11 @@ var propertySelectorMap = map[string]string{ } var mainColumns = map[string]string{ - "user_browser": "$browser", - "user_device": "$device_type", - "user_country": "$country", - "referrer": "$referrer", + "userBrowser": "$browser", + "userDevice": "$device_type", + "userCountry": "$country", + "referrer": "$referrer", + // TODO add more columns if needed } func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index c40cc70ea..5b59001df 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -42,6 +42,8 @@ func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters [ } var validFilterTypes = map[FilterType]struct{}{ + "LOCATION": {}, + "CLICK": {}, FilterClick: {}, FilterInput: {}, FilterLocation: {}, @@ -78,17 +80,13 @@ type filterConfig struct { IsNumeric bool } -var filterTypeConfigs = map[FilterType]filterConfig{ - FilterClick: {LogicalProperty: "label", EventName: "CLICK"}, - FilterInput: {LogicalProperty: "label", EventName: "INPUT"}, - FilterLocation: {LogicalProperty: "url_path", EventName: "LOCATION"}, - FilterCustom: {LogicalProperty: "name", EventName: "CUSTOM"}, - FilterTag: {LogicalProperty: "tag", EventName: "TAG"}, -} - -var nestedFilterTypeConfigs = map[string]filterConfig{ +var propertyKeyMap = map[string]filterConfig{ + "LOCATION": {LogicalProperty: "url_path"}, + "CLICK": {LogicalProperty: "label"}, + "INPUT": {LogicalProperty: "label"}, "fetchUrl": {LogicalProperty: "url_path"}, "fetchStatusCode": {LogicalProperty: "status", IsNumeric: true}, + // TODO add more mappings as needed } func getColumnAccessor(logicalProp string, isNumeric bool, opts BuildConditionsOptions) string { @@ -126,14 +124,14 @@ func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) ( for _, f := range filters { _, isValidType := validFilterTypes[f.Type] - if !isValidType || !f.IsEvent { + if !isValidType { continue } if f.Type == FilterFetch { var fetchConds []string for _, nf := range f.Filters { - nestedConfig, ok := nestedFilterTypeConfigs[string(nf.Type)] + nestedConfig, ok := propertyKeyMap[string(nf.Type)] if !ok { continue } @@ -149,16 +147,20 @@ func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) ( names = append(names, "REQUEST") } } else { - config, ok := filterTypeConfigs[f.Type] + config, ok := propertyKeyMap[string(f.Type)] if !ok { - continue + config = filterConfig{ + LogicalProperty: string(f.Type), + } } accessor := getColumnAccessor(config.LogicalProperty, config.IsNumeric, opts) c := buildCond(accessor, f.Value, f.Operator) if c != "" { conds = append(conds, c) - names = append(names, config.EventName) + if f.IsEvent { + names = append(names, string(f.Type)) + } } } } From f6485005c6d49987eff797e6c4bffcce907e29e1 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Mon, 28 Apr 2025 10:34:35 +0200 Subject: [PATCH 14/30] feat(product_analytics): funnels card handle duration --- backend/pkg/analytics/charts/metric_funnel.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_funnel.go b/backend/pkg/analytics/charts/metric_funnel.go index 63fc19a41..a52c0d22e 100644 --- a/backend/pkg/analytics/charts/metric_funnel.go +++ b/backend/pkg/analytics/charts/metric_funnel.go @@ -59,6 +59,18 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { } } + // extract duration filter + var minDur, maxDur int64 + for i := len(globalFilters) - 1; i >= 0; i-- { + if globalFilters[i].Type == "duration" { + if vals, ok := globalFilters[i].Value.([]interface{}); ok && len(vals) == 2 { + minDur = int64(vals[0].(float64)) + maxDur = int64(vals[1].(float64)) + } + globalFilters = append(globalFilters[:i], globalFilters[i+1:]...) + } + } + // Global filters globalConds, globalNames := buildEventConditions(globalFilters, BuildConditionsOptions{ DefinedColumns: mainColumns, @@ -70,6 +82,9 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { "s.duration > 0", fmt.Sprintf("e.project_id = %d", p.ProjectId), } + if maxDur > 0 { + base = append(base, fmt.Sprintf("s.duration BETWEEN %d AND %d", minDur, maxDur)) + } base = append(base, globalConds...) if len(globalNames) > 0 { base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") @@ -79,14 +94,11 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { var stepNames []string var stepExprs []string for i, filter := range eventFilters { - // Step name from filter type stepNames = append(stepNames, fmt.Sprintf("'%s'", filter.Type)) exprs, _ := buildEventConditions([]Filter{filter}, BuildConditionsOptions{DefinedColumns: mainColumns}) - // replace main.$properties references for j, c := range exprs { c = strings.ReplaceAll(c, "toString(main.`$properties`)", "properties") c = strings.ReplaceAll(c, "main.`$properties`", "properties") - // wrap JSON for JSONExtractString c = strings.ReplaceAll(c, "JSONExtractString(properties", "JSONExtractString(toString(properties)") exprs[j] = c } From c6076c5e7e113821e579c515d8374e37ef38cf8b Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Mon, 28 Apr 2025 10:59:20 +0200 Subject: [PATCH 15/30] feat(product_analytics): funnels card handle duration --- backend/pkg/analytics/charts/metric_funnel.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_funnel.go b/backend/pkg/analytics/charts/metric_funnel.go index a52c0d22e..9a562e352 100644 --- a/backend/pkg/analytics/charts/metric_funnel.go +++ b/backend/pkg/analytics/charts/metric_funnel.go @@ -3,6 +3,7 @@ package charts import ( "fmt" "openreplay/backend/pkg/analytics/db" + "strconv" "strings" ) @@ -49,8 +50,7 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { metricFormat := p.MetricPayload.MetricFormat // separate global vs step filters based on IsEvent flag - var globalFilters []Filter - var eventFilters []Filter + var globalFilters, eventFilters []Filter for _, flt := range s.Filter.Filters { if flt.IsEvent { eventFilters = append(eventFilters, flt) @@ -63,9 +63,10 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { var minDur, maxDur int64 for i := len(globalFilters) - 1; i >= 0; i-- { if globalFilters[i].Type == "duration" { - if vals, ok := globalFilters[i].Value.([]interface{}); ok && len(vals) == 2 { - minDur = int64(vals[0].(float64)) - maxDur = int64(vals[1].(float64)) + vals := globalFilters[i].Value // []string + if len(vals) == 2 { + minDur, _ = strconv.ParseInt(vals[0], 10, 64) + maxDur, _ = strconv.ParseInt(vals[1], 10, 64) } globalFilters = append(globalFilters[:i], globalFilters[i+1:]...) } @@ -85,6 +86,7 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { if maxDur > 0 { base = append(base, fmt.Sprintf("s.duration BETWEEN %d AND %d", minDur, maxDur)) } + base = append(base, globalConds...) if len(globalNames) > 0 { base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") From 5d6d94ed4d5636f3d89fd47c7d99bb37d6973d3e Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 29 Apr 2025 16:05:07 +0200 Subject: [PATCH 16/30] feat(product_analytics): heatmaps wip --- backend/pkg/analytics/charts/metric_funnel.go | 116 +++++------ .../pkg/analytics/charts/metric_heatmaps.go | 99 ++++++++++ .../charts/metric_heatmaps_session.go | 82 ++++++++ backend/pkg/analytics/charts/model.go | 1 + backend/pkg/analytics/charts/query.go | 182 +++++++++--------- backend/pkg/analytics/db/connector.go | 11 ++ 6 files changed, 337 insertions(+), 154 deletions(-) create mode 100644 backend/pkg/analytics/charts/metric_heatmaps.go create mode 100644 backend/pkg/analytics/charts/metric_heatmaps_session.go diff --git a/backend/pkg/analytics/charts/metric_funnel.go b/backend/pkg/analytics/charts/metric_funnel.go index 9a562e352..f4e857f3f 100644 --- a/backend/pkg/analytics/charts/metric_funnel.go +++ b/backend/pkg/analytics/charts/metric_funnel.go @@ -3,7 +3,6 @@ package charts import ( "fmt" "openreplay/backend/pkg/analytics/db" - "strconv" "strings" ) @@ -49,33 +48,52 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { s := p.MetricPayload.Series[0] metricFormat := p.MetricPayload.MetricFormat - // separate global vs step filters based on IsEvent flag - var globalFilters, eventFilters []Filter + // Separate global vs step filters + var globalFilters, stepFilters []Filter for _, flt := range s.Filter.Filters { if flt.IsEvent { - eventFilters = append(eventFilters, flt) + stepFilters = append(stepFilters, flt) } else { globalFilters = append(globalFilters, flt) } } - // extract duration filter - var minDur, maxDur int64 - for i := len(globalFilters) - 1; i >= 0; i-- { - if globalFilters[i].Type == "duration" { - vals := globalFilters[i].Value // []string - if len(vals) == 2 { - minDur, _ = strconv.ParseInt(vals[0], 10, 64) - maxDur, _ = strconv.ParseInt(vals[1], 10, 64) + // 1. Collect required mainColumns from all filters (including nested) + requiredColumns := make(map[string]struct{}) + var collectColumns func([]Filter) + collectColumns = func(filters []Filter) { + for _, flt := range filters { + if col, ok := mainColumns[string(flt.Type)]; ok { + requiredColumns[col] = struct{}{} } - globalFilters = append(globalFilters[:i], globalFilters[i+1:]...) + collectColumns(flt.Filters) } } + collectColumns(globalFilters) + collectColumns(stepFilters) - // Global filters - globalConds, globalNames := buildEventConditions(globalFilters, BuildConditionsOptions{ - DefinedColumns: mainColumns, - MainTableAlias: "e", + // 2. Build SELECT clause for CTE + selectCols := []string{ + `e.created_at`, + `e."$event_name" AS event_name`, + `e."$properties" AS properties`, + } + for col := range requiredColumns { + logical := reverseLookup(mainColumns, col) + selectCols = append(selectCols, fmt.Sprintf(`e."%s" AS %s`, col, logical)) + } + selectCols = append(selectCols, + `e.session_id`, + `e.distinct_id`, + `s.user_id AS session_user_id`, + fmt.Sprintf("if('%s' = 'sessionCount', toString(e.session_id), coalesce(nullif(s.user_id,''),e.distinct_id)) AS entity_id", metricFormat), + ) + + // 3. Global conditions + globalConds, _ := buildEventConditions(globalFilters, BuildConditionsOptions{ + DefinedColumns: cteColumnAliases(), // logical -> logical (CTE alias) + MainTableAlias: "e", + PropertiesColumnName: "$properties", }) base := []string{ fmt.Sprintf("e.created_at >= toDateTime(%d/1000)", p.MetricPayload.StartTimestamp), @@ -83,55 +101,37 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { "s.duration > 0", fmt.Sprintf("e.project_id = %d", p.ProjectId), } - if maxDur > 0 { - base = append(base, fmt.Sprintf("s.duration BETWEEN %d AND %d", minDur, maxDur)) - } - base = append(base, globalConds...) - if len(globalNames) > 0 { - base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") - } - - // Build steps and per-step conditions only for eventFilters - var stepNames []string - var stepExprs []string - for i, filter := range eventFilters { - stepNames = append(stepNames, fmt.Sprintf("'%s'", filter.Type)) - exprs, _ := buildEventConditions([]Filter{filter}, BuildConditionsOptions{DefinedColumns: mainColumns}) - for j, c := range exprs { - c = strings.ReplaceAll(c, "toString(main.`$properties`)", "properties") - c = strings.ReplaceAll(c, "main.`$properties`", "properties") - c = strings.ReplaceAll(c, "JSONExtractString(properties", "JSONExtractString(toString(properties)") - exprs[j] = c - } - var expr string - if len(exprs) > 0 { - expr = fmt.Sprintf("(event_name = funnel_steps[%d] AND %s)", i+1, strings.Join(exprs, " AND ")) - } else { - expr = fmt.Sprintf("(event_name = funnel_steps[%d])", i+1) - } - stepExprs = append(stepExprs, expr) - } - stepsArr := "[" + strings.Join(stepNames, ",") + "]" - windowArgs := strings.Join(stepExprs, ",") - - // Compose WHERE clause where := strings.Join(base, " AND ") - // Final query + // 4. Step conditions + var stepNames []string + var stepExprs []string + for i, filter := range stepFilters { + stepNames = append(stepNames, fmt.Sprintf("'%s'", filter.Type)) + stepConds, _ := buildEventConditions([]Filter{filter}, BuildConditionsOptions{ + DefinedColumns: cteColumnAliases(), // logical -> logical (CTE alias) + PropertiesColumnName: "properties", + MainTableAlias: "", + }) + + stepCondExprs := []string{fmt.Sprintf("event_name = funnel_steps[%d]", i+1)} + if len(stepConds) > 0 { + stepCondExprs = append(stepCondExprs, stepConds...) + } + stepExprs = append(stepExprs, fmt.Sprintf("(%s)", strings.Join(stepCondExprs, " AND "))) + } + + stepsArr := "[" + strings.Join(stepNames, ",") + "]" + windowArgs := strings.Join(stepExprs, ",\n ") + q := fmt.Sprintf(` WITH %s AS funnel_steps, 86400 AS funnel_window_seconds, events_for_funnel AS ( SELECT - e.created_at, - e."$event_name" AS event_name, - e."$properties" AS properties, - e.session_id, - e.distinct_id, - s.user_id AS session_user_id, - if('%s' = 'sessionCount', toString(e.session_id), coalesce(nullif(s.user_id,''),e.distinct_id)) AS entity_id + %s FROM product_analytics.events AS e JOIN experimental.sessions AS s USING(session_id) WHERE %s @@ -167,7 +167,7 @@ SELECT FROM step_list AS s LEFT JOIN counts_by_level AS c ON s.level_number = c.level_number ORDER BY s.level_number; -`, stepsArr, metricFormat, where, windowArgs) +`, stepsArr, strings.Join(selectCols, ",\n "), where, windowArgs) return q, nil } diff --git a/backend/pkg/analytics/charts/metric_heatmaps.go b/backend/pkg/analytics/charts/metric_heatmaps.go new file mode 100644 index 000000000..1acd4cd4c --- /dev/null +++ b/backend/pkg/analytics/charts/metric_heatmaps.go @@ -0,0 +1,99 @@ +package charts + +import ( + "fmt" + "openreplay/backend/pkg/analytics/db" + "strings" +) + +type HeatmapPoint struct { + NormalizedX float64 `json:"normalized_x"` + NormalizedY float64 `json:"normalized_y"` +} + +type HeatmapResponse struct { + Points []HeatmapPoint `json:"points"` +} + +type HeatmapQueryBuilder struct{} + +func (h HeatmapQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { + q, err := h.buildQuery(p) + if err != nil { + return nil, err + } + rows, err := conn.Query(q) + if err != nil { + return nil, err + } + defer rows.Close() + + var pts []HeatmapPoint + for rows.Next() { + var x, y float64 + if err := rows.Scan(&x, &y); err != nil { + return nil, err + } + pts = append(pts, HeatmapPoint{x, y}) + } + + return HeatmapResponse{ + Points: pts, + }, nil +} + +func (h HeatmapQueryBuilder) buildQuery(p Payload) (string, error) { + if len(p.MetricPayload.Series) == 0 { + return "", fmt.Errorf("series empty") + } + s := p.MetricPayload.Series[0] + + var globalFilters, eventFilters []Filter + for _, flt := range s.Filter.Filters { + if flt.IsEvent { + eventFilters = append(eventFilters, flt) + } else { + globalFilters = append(globalFilters, flt) + } + } + + globalConds, globalNames := buildEventConditions(globalFilters, BuildConditionsOptions{ + DefinedColumns: mainColumns, + MainTableAlias: "e", + }) + + eventConds, eventNames := buildEventConditions(eventFilters, BuildConditionsOptions{ + DefinedColumns: mainColumns, + MainTableAlias: "e", + }) + + base := []string{ + fmt.Sprintf("e.created_at >= toDateTime(%d/1000)", p.MetricPayload.StartTimestamp), + fmt.Sprintf("e.created_at < toDateTime(%d/1000)", p.MetricPayload.EndTimestamp+86400000), + fmt.Sprintf("e.project_id = %d", p.ProjectId), + } + base = append(base, globalConds...) + if len(globalNames) > 0 { + base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") + } + + if len(eventNames) > 0 { + base = append(base, "e.`$event_name` IN ("+buildInClause(eventNames)+")") + } + + base = append(base, eventConds...) + + where := strings.Join(base, " AND ") + + q := fmt.Sprintf(` +SELECT + + + JSONExtractFloat(toString(e."$properties"), 'normalized_x') AS normalized_x, + JSONExtractFloat(toString(e."$properties"), 'normalized_y') AS normalized_y +FROM product_analytics.events AS e +JOIN experimental.sessions AS s USING(session_id) +WHERE %s;`, where) + + return q, nil +} diff --git a/backend/pkg/analytics/charts/metric_heatmaps_session.go b/backend/pkg/analytics/charts/metric_heatmaps_session.go new file mode 100644 index 000000000..049a775e9 --- /dev/null +++ b/backend/pkg/analytics/charts/metric_heatmaps_session.go @@ -0,0 +1,82 @@ +package charts + +import ( + "fmt" + "openreplay/backend/pkg/analytics/db" + "strings" +) + +type HeatmapSessionResponse struct { + //Points []HeatmapPoint `json:"points"` + SessionID uint64 `json:"session_id"` +} + +type HeatmapSessionQueryBuilder struct{} + +func (h HeatmapSessionQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { + shortestQ, err := h.buildQuery(p) + if err != nil { + return nil, err + } + var sid uint64 + row, err := conn.QueryRow(shortestQ) + if err != nil { + return nil, err + } + + if err := row.Scan(&sid); err != nil { + return nil, err + } + + return HeatmapSessionResponse{ + SessionID: sid, + }, nil +} + +func (h HeatmapSessionQueryBuilder) buildQuery(p Payload) (string, error) { + if len(p.MetricPayload.Series) == 0 { + return "", fmt.Errorf("series empty") + } + s := p.MetricPayload.Series[0] + + var globalFilters, eventFilters []Filter + for _, flt := range s.Filter.Filters { + if flt.IsEvent { + eventFilters = append(eventFilters, flt) + } else { + globalFilters = append(globalFilters, flt) + } + } + + globalConds, globalNames := buildEventConditions(globalFilters, BuildConditionsOptions{ + DefinedColumns: mainColumns, + MainTableAlias: "e", + }) + eventConds, _ := buildEventConditions(eventFilters, BuildConditionsOptions{ + DefinedColumns: mainColumns, + MainTableAlias: "e", + }) + + base := []string{ + fmt.Sprintf("e.created_at >= toDateTime(%d/1000)", p.MetricPayload.StartTimestamp), + fmt.Sprintf("e.created_at < toDateTime(%d/1000)", p.MetricPayload.EndTimestamp+86400000), + fmt.Sprintf("e.project_id = %d", p.ProjectId), + "e.\"$event_name\" = 'CLICK'", + } + base = append(base, globalConds...) + if len(globalNames) > 0 { + base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") + } + base = append(base, eventConds...) + + where := strings.Join(base, " AND ") + + return fmt.Sprintf(` + SELECT + s.session_id + FROM product_analytics.events AS e + JOIN experimental.sessions AS s USING(session_id) + WHERE %s + ORDER BY s.duration ASC + LIMIT 1;`, where), nil +} diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index e22abcbe5..15425897b 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -48,6 +48,7 @@ const ( MetricTypeTimeseries MetricType = "timeseries" MetricTypeTable MetricType = "table" MetricTypeFunnel MetricType = "funnel" + MetricTypeHeatmap MetricType = "heatmaps" ) const ( diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index 5b59001df..1e62a3a8e 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -25,22 +25,13 @@ func NewQueryBuilder(p Payload) (QueryBuilder, error) { return FunnelQueryBuilder{}, nil case MetricTypeTable: return TableQueryBuilder{}, nil + case MetricTypeHeatmap: + return HeatmapQueryBuilder{}, nil default: return nil, fmt.Errorf("unknown metric type: %s", p.MetricType) } } -func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters []Filter) { - for _, f := range filters { - if f.IsEvent { - eventFilters = append(eventFilters, f) - } else { - sessionFilters = append(sessionFilters, f) - } - } - return -} - var validFilterTypes = map[FilterType]struct{}{ "LOCATION": {}, "CLICK": {}, @@ -90,17 +81,16 @@ var propertyKeyMap = map[string]filterConfig{ } func getColumnAccessor(logicalProp string, isNumeric bool, opts BuildConditionsOptions) string { + // Use CTE alias if present in DefinedColumns if actualCol, ok := opts.DefinedColumns[logicalProp]; ok && actualCol != "" { - return fmt.Sprintf("%s.`%s`", opts.MainTableAlias, actualCol) + return actualCol } - + // Otherwise, extract from $properties JSON jsonFunc := "JSONExtractString" if isNumeric { - jsonFunc = "JSONExtractFloat" // Or JSONExtractInt, etc. + jsonFunc = "JSONExtractFloat" } - - return fmt.Sprintf("%s(toString(%s.`%s`), '%s')", - jsonFunc, opts.MainTableAlias, opts.PropertiesColumnName, logicalProp) + return fmt.Sprintf("%s(toString(%s), '%s')", jsonFunc, opts.PropertiesColumnName, logicalProp) } func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) (conds, names []string) { @@ -109,7 +99,6 @@ func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) ( PropertiesColumnName: "$properties", DefinedColumns: make(map[string]string), } - if len(options) > 0 { if options[0].MainTableAlias != "" { opts.MainTableAlias = options[0].MainTableAlias @@ -121,24 +110,21 @@ func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) ( opts.DefinedColumns = options[0].DefinedColumns } } - for _, f := range filters { - _, isValidType := validFilterTypes[f.Type] - if !isValidType { + _, okType := validFilterTypes[f.Type] + if !okType { continue } - + // process main filter if f.Type == FilterFetch { var fetchConds []string for _, nf := range f.Filters { - nestedConfig, ok := propertyKeyMap[string(nf.Type)] + cfg, ok := propertyKeyMap[string(nf.Type)] if !ok { continue } - - accessor := getColumnAccessor(nestedConfig.LogicalProperty, nestedConfig.IsNumeric, opts) - c := buildCond(accessor, nf.Value, f.Operator) // Uses parent filter's operator - if c != "" { + acc := getColumnAccessor(cfg.LogicalProperty, cfg.IsNumeric, opts) + if c := buildCond(acc, nf.Value, f.Operator); c != "" { fetchConds = append(fetchConds, c) } } @@ -147,80 +133,35 @@ func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) ( names = append(names, "REQUEST") } } else { - config, ok := propertyKeyMap[string(f.Type)] + cfg, ok := propertyKeyMap[string(f.Type)] if !ok { - config = filterConfig{ - LogicalProperty: string(f.Type), + cfg = filterConfig{LogicalProperty: string(f.Type)} + } + acc := getColumnAccessor(cfg.LogicalProperty, cfg.IsNumeric, opts) + + // when the Operator isAny or onAny just add the event name to the list + if f.Operator == "isAny" || f.Operator == "onAny" { + if f.IsEvent { + names = append(names, string(f.Type)) } + continue } - accessor := getColumnAccessor(config.LogicalProperty, config.IsNumeric, opts) - c := buildCond(accessor, f.Value, f.Operator) - if c != "" { + if c := buildCond(acc, f.Value, f.Operator); c != "" { conds = append(conds, c) if f.IsEvent { names = append(names, string(f.Type)) } } } - } - return -} -func buildEventConditionsX(filters []Filter) (conds, names []string) { - for _, f := range filters { - if f.IsEvent { - switch f.Type { - case FilterClick: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, "is") - if c != "" { - conds = append(conds, c) - } - names = append(names, "CLICK") - case FilterInput: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'label')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "INPUT") - case FilterLocation: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "LOCATION") - case FilterCustom: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'name')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "CUSTOM") - case FilterFetch: - var fetchConds []string - for _, nf := range f.Filters { - switch nf.Type { - case "fetchUrl": - c := buildCond("JSONExtractString(toString(main.`$properties`), 'url_path')", nf.Value, f.Operator) - if c != "" { - fetchConds = append(fetchConds, c) - } - case "fetchStatusCode": - c := buildCond("JSONExtractFloat(toString(main.`$properties`), 'status')", nf.Value, f.Operator) - if c != "" { - fetchConds = append(fetchConds, c) - } - } - } - if len(fetchConds) > 0 { - conds = append(conds, strings.Join(fetchConds, " AND ")) - } - names = append(names, "REQUEST") - case FilterTag: - c := buildCond("JSONExtractString(toString(main.`$properties`), 'tag')", f.Value, f.Operator) - if c != "" { - conds = append(conds, c) - } - names = append(names, "TAG") + // process sub-filters + if len(f.Filters) > 0 && f.Type != FilterFetch { + subOpts := opts // Inherit parent's options + subConds, subNames := buildEventConditions(f.Filters, subOpts) + if len(subConds) > 0 { + conds = append(conds, strings.Join(subConds, " AND ")) + names = append(names, subNames...) } } } @@ -285,6 +226,16 @@ func buildCond(expr string, values []string, operator string) string { for _, v := range values { conds = append(conds, fmt.Sprintf("%s ILIKE '%%%s%%'", expr, v)) } + if len(conds) > 1 { + return "(" + strings.Join(conds, " OR ") + ")" + } + return conds[0] + case "regex": + var conds []string + for _, v := range values { + conds = append(conds, fmt.Sprintf("match(%s, '%s')", expr, v)) + } + if len(conds) > 1 { return "(" + strings.Join(conds, " OR ") + ")" } @@ -316,12 +267,12 @@ func buildCond(expr string, values []string, operator string) string { return "(" + strings.Join(conds, " OR ") + ")" } return conds[0] - case "notEquals": + case "notEquals", "not", "off": if len(values) > 1 { return fmt.Sprintf("%s NOT IN (%s)", expr, buildInClause(values)) } return fmt.Sprintf("%s <> '%s'", expr, values[0]) - case "greaterThan": + case "greaterThan", "gt": var conds []string for _, v := range values { conds = append(conds, fmt.Sprintf("%s > '%s'", expr, v)) @@ -330,7 +281,7 @@ func buildCond(expr string, values []string, operator string) string { return "(" + strings.Join(conds, " OR ") + ")" } return conds[0] - case "greaterThanOrEqual": + case "greaterThanOrEqual", "gte": var conds []string for _, v := range values { conds = append(conds, fmt.Sprintf("%s >= '%s'", expr, v)) @@ -339,7 +290,7 @@ func buildCond(expr string, values []string, operator string) string { return "(" + strings.Join(conds, " OR ") + ")" } return conds[0] - case "lessThan": + case "lessThan", "lt": var conds []string for _, v := range values { conds = append(conds, fmt.Sprintf("%s < '%s'", expr, v)) @@ -348,7 +299,7 @@ func buildCond(expr string, values []string, operator string) string { return "(" + strings.Join(conds, " OR ") + ")" } return conds[0] - case "lessThanOrEqual": + case "lessThanOrEqual", "lte": var conds []string for _, v := range values { conds = append(conds, fmt.Sprintf("%s <= '%s'", expr, v)) @@ -367,7 +318,7 @@ func buildCond(expr string, values []string, operator string) string { return fmt.Sprintf("%s NOT IN (%s)", expr, buildInClause(values)) } return fmt.Sprintf("%s <> '%s'", expr, values[0]) - case "equals", "is": + case "equals", "is", "on": if len(values) > 1 { return fmt.Sprintf("%s IN (%s)", expr, buildInClause(values)) } @@ -471,3 +422,42 @@ func FillMissingDataPoints( } return results } + +func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters []Filter) { + for _, f := range filters { + if f.IsEvent { + eventFilters = append(eventFilters, f) + } else { + sessionFilters = append(sessionFilters, f) + } + } + return +} + +// Returns a map: logical property -> CTE alias (e.g., "userBrowser" -> "userBrowser") +func cteColumnAliases() map[string]string { + aliases := make(map[string]string) + for logical := range mainColumns { + aliases[logical] = logical + } + return aliases +} + +// Returns a map: logical property -> source column (e.g., "userBrowser" -> "$browser") +func cteSourceColumns() map[string]string { + cols := make(map[string]string) + for logical, col := range mainColumns { + cols[logical] = col + } + return cols +} + +// Helper for reverse lookup (used for dynamic SELECT) +func reverseLookup(m map[string]string, value string) string { + for k, v := range m { + if v == value { + return k + } + } + return "" +} diff --git a/backend/pkg/analytics/db/connector.go b/backend/pkg/analytics/db/connector.go index 62b31068c..45983ee16 100644 --- a/backend/pkg/analytics/db/connector.go +++ b/backend/pkg/analytics/db/connector.go @@ -22,6 +22,7 @@ type TableResponse struct { type Connector interface { Stop() error Query(query string) (driver.Rows, error) + QueryRow(query string) (driver.Row, error) QueryArgs(query string, args map[string]interface{}) (driver.Rows, error) } @@ -64,6 +65,16 @@ func (c *connectorImpl) Query(query string) (driver.Rows, error) { return rows, nil } +func (c *connectorImpl) QueryRow(query string) (driver.Row, error) { + row := c.conn.QueryRow(context.Background(), query) + if err := row.Err(); err != nil { + return nil, err + } + //defer row.Close() + + return row, nil +} + func (c *connectorImpl) QueryArgs(query string, args map[string]interface{}) (driver.Rows, error) { rows, err := c.conn.Query(context.Background(), query, args) if err != nil { From 10ecfde97e5475243668fe7ada8e4c61abee21c7 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Fri, 2 May 2025 16:47:09 +0200 Subject: [PATCH 17/30] feat(product_analytics): heatmaps and other query improvements --- backend/pkg/analytics/charts/metric_funnel.go | 2 +- .../pkg/analytics/charts/metric_heatmaps.go | 33 +- .../charts/metric_heatmaps_session.go | 42 +- backend/pkg/analytics/charts/model.go | 1 + backend/pkg/analytics/charts/query.go | 433 ++++++++---------- 5 files changed, 238 insertions(+), 273 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_funnel.go b/backend/pkg/analytics/charts/metric_funnel.go index f4e857f3f..ec7de3267 100644 --- a/backend/pkg/analytics/charts/metric_funnel.go +++ b/backend/pkg/analytics/charts/metric_funnel.go @@ -91,7 +91,7 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { // 3. Global conditions globalConds, _ := buildEventConditions(globalFilters, BuildConditionsOptions{ - DefinedColumns: cteColumnAliases(), // logical -> logical (CTE alias) + DefinedColumns: mainColumns, MainTableAlias: "e", PropertiesColumnName: "$properties", }) diff --git a/backend/pkg/analytics/charts/metric_heatmaps.go b/backend/pkg/analytics/charts/metric_heatmaps.go index 1acd4cd4c..77bc17655 100644 --- a/backend/pkg/analytics/charts/metric_heatmaps.go +++ b/backend/pkg/analytics/charts/metric_heatmaps.go @@ -7,12 +7,12 @@ import ( ) type HeatmapPoint struct { - NormalizedX float64 `json:"normalized_x"` - NormalizedY float64 `json:"normalized_y"` + NormalizedX float64 `json:"normalizedX"` + NormalizedY float64 `json:"normalizedY"` } type HeatmapResponse struct { - Points []HeatmapPoint `json:"points"` + Points []HeatmapPoint `json:"data"` } type HeatmapQueryBuilder struct{} @@ -57,29 +57,32 @@ func (h HeatmapQueryBuilder) buildQuery(p Payload) (string, error) { } } - globalConds, globalNames := buildEventConditions(globalFilters, BuildConditionsOptions{ + globalConds, _ := buildEventConditions(globalFilters, BuildConditionsOptions{ DefinedColumns: mainColumns, MainTableAlias: "e", }) - eventConds, eventNames := buildEventConditions(eventFilters, BuildConditionsOptions{ + eventConds, _ := buildEventConditions(eventFilters, BuildConditionsOptions{ DefinedColumns: mainColumns, MainTableAlias: "e", }) base := []string{ fmt.Sprintf("e.created_at >= toDateTime(%d/1000)", p.MetricPayload.StartTimestamp), - fmt.Sprintf("e.created_at < toDateTime(%d/1000)", p.MetricPayload.EndTimestamp+86400000), + fmt.Sprintf("e.created_at < toDateTime(%d/1000)", p.MetricPayload.EndTimestamp), fmt.Sprintf("e.project_id = %d", p.ProjectId), + "e.session_id IS NOT NULL", + "e.`$event_name` = 'CLICK'", } base = append(base, globalConds...) - if len(globalNames) > 0 { - base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") - } - if len(eventNames) > 0 { - base = append(base, "e.`$event_name` IN ("+buildInClause(eventNames)+")") - } + //if len(globalNames) > 0 { + // base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") + //} + + //if len(eventNames) > 0 { + // base = append(base, "e.`$event_name` IN ("+buildInClause(eventNames)+")") + //} base = append(base, eventConds...) @@ -87,13 +90,11 @@ func (h HeatmapQueryBuilder) buildQuery(p Payload) (string, error) { q := fmt.Sprintf(` SELECT - - JSONExtractFloat(toString(e."$properties"), 'normalized_x') AS normalized_x, JSONExtractFloat(toString(e."$properties"), 'normalized_y') AS normalized_y FROM product_analytics.events AS e -JOIN experimental.sessions AS s USING(session_id) -WHERE %s;`, where) +-- JOIN experimental.sessions AS s USING(session_id) +WHERE %s LIMIT 500;`, where) return q, nil } diff --git a/backend/pkg/analytics/charts/metric_heatmaps_session.go b/backend/pkg/analytics/charts/metric_heatmaps_session.go index 049a775e9..b8ab2c5d5 100644 --- a/backend/pkg/analytics/charts/metric_heatmaps_session.go +++ b/backend/pkg/analytics/charts/metric_heatmaps_session.go @@ -7,8 +7,10 @@ import ( ) type HeatmapSessionResponse struct { - //Points []HeatmapPoint `json:"points"` - SessionID uint64 `json:"session_id"` + SessionID uint64 `json:"session_id"` + StartTs uint64 `json:"start_ts"` + Duration uint32 `json:"duration"` + EventTimestamp uint64 `json:"event_timestamp"` } type HeatmapSessionQueryBuilder struct{} @@ -19,17 +21,25 @@ func (h HeatmapSessionQueryBuilder) Execute(p Payload, conn db.Connector) (inter return nil, err } var sid uint64 + var startTs uint64 + var duration uint32 + var eventTs uint64 row, err := conn.QueryRow(shortestQ) if err != nil { return nil, err } - if err := row.Scan(&sid); err != nil { + if err := row.Scan(&sid, &startTs, &duration, &eventTs); err != nil { return nil, err } + // TODO get mob urls + return HeatmapSessionResponse{ - SessionID: sid, + SessionID: sid, + StartTs: startTs, + Duration: duration, + EventTimestamp: eventTs, }, nil } @@ -48,10 +58,11 @@ func (h HeatmapSessionQueryBuilder) buildQuery(p Payload) (string, error) { } } - globalConds, globalNames := buildEventConditions(globalFilters, BuildConditionsOptions{ + globalConds, _ := buildEventConditions(globalFilters, BuildConditionsOptions{ DefinedColumns: mainColumns, MainTableAlias: "e", }) + eventConds, _ := buildEventConditions(eventFilters, BuildConditionsOptions{ DefinedColumns: mainColumns, MainTableAlias: "e", @@ -61,22 +72,25 @@ func (h HeatmapSessionQueryBuilder) buildQuery(p Payload) (string, error) { fmt.Sprintf("e.created_at >= toDateTime(%d/1000)", p.MetricPayload.StartTimestamp), fmt.Sprintf("e.created_at < toDateTime(%d/1000)", p.MetricPayload.EndTimestamp+86400000), fmt.Sprintf("e.project_id = %d", p.ProjectId), - "e.\"$event_name\" = 'CLICK'", - } - base = append(base, globalConds...) - if len(globalNames) > 0 { - base = append(base, "e.`$event_name` IN ("+buildInClause(globalNames)+")") + "s.duration > 500", + "e.`$event_name` = 'LOCATION'", } base = append(base, eventConds...) + base = append(base, globalConds...) where := strings.Join(base, " AND ") - return fmt.Sprintf(` + q := fmt.Sprintf(` SELECT - s.session_id + s.session_id, + toUnixTimestamp(s.datetime) * 1000 as startTs, + s.duration, + toUnixTimestamp(e.created_at) * 1000 as eventTs FROM product_analytics.events AS e JOIN experimental.sessions AS s USING(session_id) WHERE %s - ORDER BY s.duration ASC - LIMIT 1;`, where), nil + ORDER BY e.created_at ASC, s.duration ASC + LIMIT 1;`, where) + + return q, nil } diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 15425897b..bbb4a69b4 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -49,6 +49,7 @@ const ( MetricTypeTable MetricType = "table" MetricTypeFunnel MetricType = "funnel" MetricTypeHeatmap MetricType = "heatmaps" + MetricTypeSession MetricType = "heatmaps_session" ) const ( diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index 1e62a3a8e..caee9e0c4 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -2,6 +2,7 @@ package charts import ( "fmt" + "log" "openreplay/backend/pkg/analytics/db" "strings" ) @@ -27,50 +28,19 @@ func NewQueryBuilder(p Payload) (QueryBuilder, error) { return TableQueryBuilder{}, nil case MetricTypeHeatmap: return HeatmapQueryBuilder{}, nil + case MetricTypeSession: + return HeatmapSessionQueryBuilder{}, nil default: return nil, fmt.Errorf("unknown metric type: %s", p.MetricType) } } -var validFilterTypes = map[FilterType]struct{}{ - "LOCATION": {}, - "CLICK": {}, - FilterClick: {}, - FilterInput: {}, - FilterLocation: {}, - FilterCustom: {}, - FilterFetch: {}, - FilterTag: {}, - FilterUserCountry: {}, - FilterUserCity: {}, - FilterUserState: {}, - FilterUserId: {}, - FilterUserAnonymousId: {}, - FilterUserOs: {}, - FilterUserBrowser: {}, - FilterUserDevice: {}, - FilterPlatform: {}, - FilterRevId: {}, - FilterReferrer: {}, - FilterUtmSource: {}, - FilterUtmMedium: {}, - FilterUtmCampaign: {}, - FilterDuration: {}, - FilterMetadata: {}, -} - type BuildConditionsOptions struct { MainTableAlias string PropertiesColumnName string DefinedColumns map[string]string } -type filterConfig struct { - LogicalProperty string - EventName string - IsNumeric bool -} - var propertyKeyMap = map[string]filterConfig{ "LOCATION": {LogicalProperty: "url_path"}, "CLICK": {LogicalProperty: "label"}, @@ -80,257 +50,236 @@ var propertyKeyMap = map[string]filterConfig{ // TODO add more mappings as needed } -func getColumnAccessor(logicalProp string, isNumeric bool, opts BuildConditionsOptions) string { - // Use CTE alias if present in DefinedColumns - if actualCol, ok := opts.DefinedColumns[logicalProp]; ok && actualCol != "" { - return actualCol - } - // Otherwise, extract from $properties JSON - jsonFunc := "JSONExtractString" - if isNumeric { - jsonFunc = "JSONExtractFloat" - } - return fmt.Sprintf("%s(toString(%s), '%s')", jsonFunc, opts.PropertiesColumnName, logicalProp) +// filterConfig holds configuration for a filter type +type filterConfig struct { + LogicalProperty string + IsNumeric bool } +// getColumnAccessor returns the column name for a logical property +func getColumnAccessor(logical string, isNumeric bool, opts BuildConditionsOptions) string { + // helper: wrap names starting with $ in quotes + quote := func(name string) string { + if strings.HasPrefix(name, "$") { + return fmt.Sprintf("\"%s\"", name) + } + return name + } + + // explicit column mapping + if col, ok := opts.DefinedColumns[logical]; ok { + col = quote(col) + if opts.MainTableAlias != "" { + return fmt.Sprintf("%s.%s", opts.MainTableAlias, col) + } + return col + } + + // determine property key + propKey := logical + if cfg, ok := propertyKeyMap[logical]; ok { + propKey = cfg.LogicalProperty + } + + // build properties column reference + colName := opts.PropertiesColumnName + if opts.MainTableAlias != "" { + colName = fmt.Sprintf("%s.%s", opts.MainTableAlias, colName) + } + colName = quote(colName) + + // JSON extraction + if isNumeric { + return fmt.Sprintf("toFloat64(JSONExtractString(toString(%s), '%s'))", colName, propKey) + } + return fmt.Sprintf("JSONExtractString(toString(%s), '%s')", colName, propKey) +} + +// buildEventConditions builds SQL conditions and names from filters func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) (conds, names []string) { opts := BuildConditionsOptions{ - MainTableAlias: "main", + MainTableAlias: "", PropertiesColumnName: "$properties", DefinedColumns: make(map[string]string), } if len(options) > 0 { - if options[0].MainTableAlias != "" { - opts.MainTableAlias = options[0].MainTableAlias + opt := options[0] + if opt.MainTableAlias != "" { + opts.MainTableAlias = opt.MainTableAlias } - if options[0].PropertiesColumnName != "" { - opts.PropertiesColumnName = options[0].PropertiesColumnName + if opt.PropertiesColumnName != "" { + opts.PropertiesColumnName = opt.PropertiesColumnName } - if options[0].DefinedColumns != nil { - opts.DefinedColumns = options[0].DefinedColumns + if opt.DefinedColumns != nil { + opts.DefinedColumns = opt.DefinedColumns } } for _, f := range filters { - _, okType := validFilterTypes[f.Type] - if !okType { - continue - } - // process main filter - if f.Type == FilterFetch { - var fetchConds []string - for _, nf := range f.Filters { - cfg, ok := propertyKeyMap[string(nf.Type)] - if !ok { - continue - } - acc := getColumnAccessor(cfg.LogicalProperty, cfg.IsNumeric, opts) - if c := buildCond(acc, nf.Value, f.Operator); c != "" { - fetchConds = append(fetchConds, c) - } - } - if len(fetchConds) > 0 { - conds = append(conds, strings.Join(fetchConds, " AND ")) - names = append(names, "REQUEST") - } - } else { - cfg, ok := propertyKeyMap[string(f.Type)] - if !ok { - cfg = filterConfig{LogicalProperty: string(f.Type)} - } - acc := getColumnAccessor(cfg.LogicalProperty, cfg.IsNumeric, opts) - - // when the Operator isAny or onAny just add the event name to the list - if f.Operator == "isAny" || f.Operator == "onAny" { - if f.IsEvent { - names = append(names, string(f.Type)) - } - continue - } - - if c := buildCond(acc, f.Value, f.Operator); c != "" { - conds = append(conds, c) - if f.IsEvent { - names = append(names, string(f.Type)) - } - } - } - - // process sub-filters - if len(f.Filters) > 0 && f.Type != FilterFetch { - subOpts := opts // Inherit parent's options - subConds, subNames := buildEventConditions(f.Filters, subOpts) - if len(subConds) > 0 { - conds = append(conds, strings.Join(subConds, " AND ")) - names = append(names, subNames...) - } + fConds, fNames := addFilter(f, opts) + if len(fConds) > 0 { + conds = append(conds, fConds...) + names = append(names, fNames...) } } return } -func buildSessionConditions(filters []Filter) []string { - var conds []string - for _, f := range filters { - if !f.IsEvent { - switch f.Type { - case FilterUserCountry: - conds = append(conds, buildCond("s.user_country", f.Value, f.Operator)) - case FilterUserCity: - conds = append(conds, buildCond("s.user_city", f.Value, f.Operator)) - case FilterUserState: - conds = append(conds, buildCond("s.user_state", f.Value, f.Operator)) - case FilterUserId: - conds = append(conds, buildCond("s.user_id", f.Value, f.Operator)) - case FilterUserAnonymousId: - conds = append(conds, buildCond("s.user_anonymous_id", f.Value, f.Operator)) - case FilterUserOs: - conds = append(conds, buildCond("s.user_os", f.Value, f.Operator)) - case FilterUserBrowser: - conds = append(conds, buildCond("s.user_browser", f.Value, f.Operator)) - case FilterUserDevice: - conds = append(conds, buildCond("s.user_device", f.Value, f.Operator)) - case FilterPlatform: - conds = append(conds, buildCond("s.user_device_type", f.Value, f.Operator)) - case FilterRevId: - conds = append(conds, buildCond("s.rev_id", f.Value, f.Operator)) - case FilterReferrer: - conds = append(conds, buildCond("s.base_referrer", f.Value, f.Operator)) - case FilterUtmSource: - conds = append(conds, buildCond("s.utm_source", f.Value, f.Operator)) - case FilterUtmMedium: - conds = append(conds, buildCond("s.utm_medium", f.Value, f.Operator)) - case FilterUtmCampaign: - conds = append(conds, buildCond("s.utm_campaign", f.Value, f.Operator)) - case FilterDuration: - if len(f.Value) == 2 { - conds = append(conds, fmt.Sprintf("s.duration >= '%s'", f.Value[0])) - conds = append(conds, fmt.Sprintf("s.duration <= '%s'", f.Value[1])) - } - case FilterMetadata: - if f.Source != "" { - conds = append(conds, buildCond(fmt.Sprintf("s.%s", f.Source), f.Value, f.Operator)) - } +// addFilter processes a single Filter and returns its SQL conditions and associated event names +func addFilter(f Filter, opts BuildConditionsOptions) (conds []string, names []string) { + var ftype = string(f.Type) + // resolve filter configuration, default if missing + cfg, ok := propertyKeyMap[ftype] + if !ok { + cfg = filterConfig{LogicalProperty: ftype, IsNumeric: false} + log.Printf("using default config for type: %v", f.Type) + } + acc := getColumnAccessor(cfg.LogicalProperty, cfg.IsNumeric, opts) + + // operator-based conditions + switch f.Operator { + case "isAny", "onAny": + if f.IsEvent { + names = append(names, ftype) + } + default: + if c := buildCond(acc, f.Value, f.Operator, cfg.IsNumeric); c != "" { + conds = append(conds, c) + if f.IsEvent { + names = append(names, ftype) } } } - return conds + + // nested sub-filters + if len(f.Filters) > 0 { + subConds, subNames := buildEventConditions(f.Filters, opts) + if len(subConds) > 0 { + conds = append(conds, strings.Join(subConds, " AND ")) + names = append(names, subNames...) + } + } + + return } -func buildCond(expr string, values []string, operator string) string { +var compOps = map[string]string{ + "equals": "=", "is": "=", "on": "=", + "notEquals": "<>", "not": "<>", "off": "<>", + "greaterThan": ">", "gt": ">", + "greaterThanOrEqual": ">=", "gte": ">=", + "lessThan": "<", "lt": "<", + "lessThanOrEqual": "<=", "lte": "<=", +} + +// buildCond constructs a condition string based on operator and values +func buildCond(expr string, values []string, operator string, isNumeric bool) string { if len(values) == 0 { return "" } switch operator { case "contains": - var conds []string - for _, v := range values { - conds = append(conds, fmt.Sprintf("%s ILIKE '%%%s%%'", expr, v)) + // wrap values with % on both sides + wrapped := make([]string, len(values)) + for i, v := range values { + wrapped[i] = fmt.Sprintf("%%%s%%", v) } - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" - } - return conds[0] - case "regex": - var conds []string - for _, v := range values { - conds = append(conds, fmt.Sprintf("match(%s, '%s')", expr, v)) - } - - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" - } - return conds[0] + return multiValCond(expr, wrapped, "%s ILIKE %s", false) case "notContains": - var conds []string - for _, v := range values { - conds = append(conds, fmt.Sprintf("NOT (%s ILIKE '%%%s%%')", expr, v)) + wrapped := make([]string, len(values)) + for i, v := range values { + wrapped[i] = fmt.Sprintf("%%%s%%", v) } - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" - } - return conds[0] + cond := multiValCond(expr, wrapped, "%s ILIKE %s", false) + return "NOT (" + cond + ")" case "startsWith": - var conds []string - for _, v := range values { - conds = append(conds, fmt.Sprintf("%s ILIKE '%s%%'", expr, v)) + wrapped := make([]string, len(values)) + for i, v := range values { + wrapped[i] = v + "%" } - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" - } - return conds[0] + return multiValCond(expr, wrapped, "%s ILIKE %s", false) case "endsWith": - var conds []string + wrapped := make([]string, len(values)) + for i, v := range values { + wrapped[i] = "%" + v + } + return multiValCond(expr, wrapped, "%s ILIKE %s", false) + case "regex": + // build match expressions + var parts []string for _, v := range values { - conds = append(conds, fmt.Sprintf("%s ILIKE '%%%s'", expr, v)) + parts = append(parts, fmt.Sprintf("match(%s, '%s')", expr, v)) } - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" + if len(parts) > 1 { + return "(" + strings.Join(parts, " OR ") + ")" } - return conds[0] - case "notEquals", "not", "off": - if len(values) > 1 { - return fmt.Sprintf("%s NOT IN (%s)", expr, buildInClause(values)) - } - return fmt.Sprintf("%s <> '%s'", expr, values[0]) - case "greaterThan", "gt": - var conds []string - for _, v := range values { - conds = append(conds, fmt.Sprintf("%s > '%s'", expr, v)) - } - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" - } - return conds[0] - case "greaterThanOrEqual", "gte": - var conds []string - for _, v := range values { - conds = append(conds, fmt.Sprintf("%s >= '%s'", expr, v)) - } - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" - } - return conds[0] - case "lessThan", "lt": - var conds []string - for _, v := range values { - conds = append(conds, fmt.Sprintf("%s < '%s'", expr, v)) - } - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" - } - return conds[0] - case "lessThanOrEqual", "lte": - var conds []string - for _, v := range values { - conds = append(conds, fmt.Sprintf("%s <= '%s'", expr, v)) - } - if len(conds) > 1 { - return "(" + strings.Join(conds, " OR ") + ")" - } - return conds[0] - case "in": - if len(values) > 1 { - return fmt.Sprintf("%s IN (%s)", expr, buildInClause(values)) - } - return fmt.Sprintf("%s = '%s'", expr, values[0]) - case "notIn": - if len(values) > 1 { - return fmt.Sprintf("%s NOT IN (%s)", expr, buildInClause(values)) - } - return fmt.Sprintf("%s <> '%s'", expr, values[0]) - case "equals", "is", "on": - if len(values) > 1 { - return fmt.Sprintf("%s IN (%s)", expr, buildInClause(values)) - } - return fmt.Sprintf("%s = '%s'", expr, values[0]) + return parts[0] + case "in", "notIn": + neg := operator == "notIn" + return inClause(expr, values, neg, isNumeric) default: - if len(values) > 1 { - return fmt.Sprintf("%s IN (%s)", expr, buildInClause(values)) + if op, ok := compOps[operator]; ok { + tmpl := "%s " + op + " %s" + return multiValCond(expr, values, tmpl, isNumeric) } - return fmt.Sprintf("%s = '%s'", expr, values[0]) + // fallback equals + tmpl := "%s = %s" + return multiValCond(expr, values, tmpl, isNumeric) } } +// formatCondition applies a template to a single value, handling quoting +func formatCondition(expr, tmpl, value string, isNumeric bool) string { + val := value + if !isNumeric { + val = fmt.Sprintf("'%s'", value) + } + return fmt.Sprintf(tmpl, expr, val) +} + +// multiValCond applies a template to one or multiple values, using formatCondition +func multiValCond(expr string, values []string, tmpl string, isNumeric bool) string { + if len(values) == 1 { + return formatCondition(expr, tmpl, values[0], isNumeric) + } + parts := make([]string, len(values)) + for i, v := range values { + parts[i] = formatCondition(expr, tmpl, v, isNumeric) + } + return "(" + strings.Join(parts, " OR ") + ")" +} + +// inClause constructs IN/NOT IN clauses with proper quoting +func inClause(expr string, values []string, negate, isNumeric bool) string { + op := "IN" + if negate { + op = "NOT IN" + } + + if len(values) == 1 { + return fmt.Sprintf("%s %s (%s)", expr, op, func() string { + if isNumeric { + return values[0] + } + return fmt.Sprintf("'%s'", values[0]) + }()) + } + quoted := make([]string, len(values)) + for i, v := range values { + if isNumeric { + quoted[i] = v + } else { + quoted[i] = fmt.Sprintf("'%s'", v) + } + } + return fmt.Sprintf("%s %s (%s)", expr, op, strings.Join(quoted, ", ")) +} + +func buildSessionConditions(filters []Filter) []string { + var conds []string + + return conds +} + func buildInClause(values []string) string { var quoted []string for _, v := range values { From 1d30b4d4cbce45bb7a770d533a8defba9a514660 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Fri, 9 May 2025 15:40:08 +0200 Subject: [PATCH 18/30] feat(product_analytics): user journey - wup --- .../analytics/charts/metric_user_journey.go | 519 ++++++++++++++++++ backend/pkg/analytics/charts/model.go | 6 + backend/pkg/analytics/charts/query.go | 2 + backend/pkg/server/api/middleware.go | 10 +- frontend/chart-request.http | 151 +++++ 5 files changed, 683 insertions(+), 5 deletions(-) create mode 100644 backend/pkg/analytics/charts/metric_user_journey.go create mode 100644 frontend/chart-request.http diff --git a/backend/pkg/analytics/charts/metric_user_journey.go b/backend/pkg/analytics/charts/metric_user_journey.go new file mode 100644 index 000000000..e4acec780 --- /dev/null +++ b/backend/pkg/analytics/charts/metric_user_journey.go @@ -0,0 +1,519 @@ +package charts + +import ( + "fmt" + "math" + "openreplay/backend/pkg/analytics/db" + "sort" + "strings" + "time" +) + +// Node represents a point in the journey diagram. +type Node struct { + Depth int `json:"depth"` + Name string `json:"name"` + EventType string `json:"eventType"` + ID int `json:"id"` +} + +// Link represents a transition between nodes. +type Link struct { + EventType string `json:"eventType"` + SessionsCount int `json:"sessionsCount"` + Value float64 `json:"value"` + Source int `json:"source"` + Target int `json:"target"` +} + +// JourneyData holds all nodes and links for the response. +type JourneyData struct { + Nodes []Node `json:"nodes"` + Links []Link `json:"links"` +} + +// JourneyResponse is the API response structure. +type JourneyResponse struct { + Data JourneyData `json:"data"` +} + +// UserJourneyQueryBuilder builds and executes the journey query. +type UserJourneyQueryBuilder struct{} + +func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { + q, err := h.buildQuery(p) + if err != nil { + return nil, err + } + rows, err := conn.Query(q) + if err != nil { + return nil, err + } + defer rows.Close() + + type row struct { + Stage uint64 + CurrentEventName string + CurrentEventProperty string + PrevEventName string + PrevEventProperty string + SessionsCount uint64 + } + + // Parse all rows into a slice + var rawData []row + for rows.Next() { + var r row + if err := rows.Scan( + &r.Stage, + &r.CurrentEventName, + &r.CurrentEventProperty, + &r.PrevEventName, + &r.PrevEventProperty, + &r.SessionsCount, + ); err != nil { + return nil, err + } + + if r.SessionsCount == 0 { + continue + } + + rawData = append(rawData, r) + } + + // Group data by stage and determine max stage + dataByStage := make(map[uint64][]row) + var maxStage uint64 = 0 + for _, r := range rawData { + dataByStage[r.Stage] = append(dataByStage[r.Stage], r) + if r.Stage > maxStage { + maxStage = r.Stage + } + } + + // Calculate total sessions per stage + stageTotals := make(map[uint64]uint64) + for stage, stageRows := range dataByStage { + for _, r := range stageRows { + stageTotals[stage] += r.SessionsCount + } + } + initialCount := stageTotals[1] + + // Number of top nodes to display per stage + topLimit := int(p.Rows) + if topLimit <= 0 { + topLimit = 5 // Default if not specified + } + + // Step 1: Determine the top paths at each stage based on destination + type pathKey struct { + eventName string + eventProp string + } + + // Map to store top paths for each stage + topPathsByStage := make(map[uint64]map[pathKey]bool) + pathCountsByStage := make(map[uint64]map[pathKey]uint64) + + for stage := uint64(1); stage <= maxStage; stage++ { + // Initialize maps for this stage + topPathsByStage[stage] = make(map[pathKey]bool) + pathCountsByStage[stage] = make(map[pathKey]uint64) + + // First, aggregate by path to get total sessions per path + for _, r := range dataByStage[stage] { + key := pathKey{eventName: r.CurrentEventName, eventProp: r.CurrentEventProperty} + pathCountsByStage[stage][key] += r.SessionsCount + } + + // Then sort paths by session count + type pathCount struct { + path pathKey + count uint64 + } + + var paths []pathCount + for path, count := range pathCountsByStage[stage] { + paths = append(paths, pathCount{path: path, count: count}) + } + + // Sort descending by count + sort.Slice(paths, func(i, j int) bool { + return paths[i].count > paths[j].count + }) + + // Mark top paths + for i, pc := range paths { + if i < topLimit { + topPathsByStage[stage][pc.path] = true + } + } + } + + // Step 2: Create nodes and track sessions + var nodes []Node + var links []Link + nodeID := 0 + + // Maps to track nodes and sessions + nodeMap := make(map[string]int) // Stage|EventName|EventProp → nodeID + othersNodes := make(map[uint64]int) // stage → "Others" nodeID + dropNodes := make(map[uint64]int) // stage → "Drop" nodeID + + incomingSessions := make(map[int]uint64) // nodeID → incoming sessions + outgoingSessions := make(map[int]uint64) // nodeID → outgoing sessions + + // Create all nodes first + for stage := uint64(1); stage <= maxStage; stage++ { + // Create regular nodes for top paths + for path := range topPathsByStage[stage] { + nodeKey := fmt.Sprintf("%d|%s|%s", stage, path.eventName, path.eventProp) + nodeMap[nodeKey] = nodeID + + nodes = append(nodes, Node{ + ID: nodeID, + Depth: int(stage) - 1, + Name: path.eventProp, + EventType: path.eventName, + }) + + // For stage 1, set incoming sessions + if stage == 1 { + incomingSessions[nodeID] = pathCountsByStage[stage][path] + } + + nodeID++ + } + + // Calculate if we need an "Others" node (when total paths > topLimit) + totalPaths := len(pathCountsByStage[stage]) + if totalPaths > topLimit { + // Calculate sessions that will go to Others + othersCount := uint64(0) + for path, count := range pathCountsByStage[stage] { + if !topPathsByStage[stage][path] { + othersCount += count + } + } + + // Only create Others if it has sessions + if othersCount > 0 { + othersNodes[stage] = nodeID + + nodes = append(nodes, Node{ + ID: nodeID, + Depth: int(stage) - 1, + Name: "other", + EventType: "OTHER", + }) + + // For stage 1, set incoming sessions for Others + if stage == 1 { + incomingSessions[nodeID] = othersCount + } + + nodeID++ + } + } + } + + // Step 3: Create links between nodes + // Use a map to deduplicate links + type linkKey struct { + src int + tgt int + } + linkSessions := make(map[linkKey]uint64) + linkTypes := make(map[linkKey]string) + + for stage := uint64(2); stage <= maxStage; stage++ { + for _, r := range dataByStage[stage] { + // Determine source node + prevStage := stage - 1 + prevPathKey := fmt.Sprintf("%d|%s|%s", prevStage, r.PrevEventName, r.PrevEventProperty) + srcID, hasSrc := nodeMap[prevPathKey] + + if !hasSrc { + // If source isn't a top node, use Others from previous stage + if othersID, hasOthers := othersNodes[prevStage]; hasOthers { + srcID = othersID + hasSrc = true + } else { + // Skip if we can't find a source + continue + } + } + + // Determine target node + curPath := pathKey{eventName: r.CurrentEventName, eventProp: r.CurrentEventProperty} + var tgtID int + var hasTgt bool + + // Check if this path is in the top paths for this stage + if topPathsByStage[stage][curPath] { + // It's a top node + curPathKey := fmt.Sprintf("%d|%s|%s", stage, r.CurrentEventName, r.CurrentEventProperty) + tgtID = nodeMap[curPathKey] + hasTgt = true + } else { + // It's part of Others + if othersID, hasOthers := othersNodes[stage]; hasOthers { + tgtID = othersID + hasTgt = true + } + } + + if !hasSrc || !hasTgt { + continue + } + + // Update session tracking + incomingSessions[tgtID] += r.SessionsCount + outgoingSessions[srcID] += r.SessionsCount + + // Record link (deduplicating) + lk := linkKey{src: srcID, tgt: tgtID} + linkSessions[lk] += r.SessionsCount + + // Prefer non-OTHER event type + if linkTypes[lk] == "" || linkTypes[lk] == "OTHER" { + linkTypes[lk] = r.CurrentEventName + } + } + } + + // Create deduplicated links + for lk, count := range linkSessions { + percent := math.Round(float64(count)*10000/float64(initialCount)) / 100 + links = append(links, Link{ + Source: lk.src, + Target: lk.tgt, + SessionsCount: int(count), + Value: percent, + EventType: linkTypes[lk], + }) + } + + // Step 4: Calculate drops and create drop nodes + cumulativeDrops := make(map[uint64]uint64) + + for stage := uint64(1); stage < maxStage; stage++ { + // Calculate new drops at this stage + stageDrops := uint64(0) + dropsFromNode := make(map[int]uint64) // nodeID -> drop count + + for _, node := range nodes { + nodeStage := uint64(node.Depth) + 1 + if nodeStage != stage { + continue + } + + incoming := incomingSessions[node.ID] + outgoing := outgoingSessions[node.ID] + + if incoming > outgoing { + dropCount := incoming - outgoing + dropsFromNode[node.ID] = dropCount + stageDrops += dropCount + } + } + + // Calculate cumulative drops + if stage == 1 { + cumulativeDrops[stage] = stageDrops + } else { + cumulativeDrops[stage] = cumulativeDrops[stage-1] + stageDrops + } + + // Create drop node if there are drops + if cumulativeDrops[stage] > 0 { + dropNodes[stage] = nodeID + + nodes = append(nodes, Node{ + ID: nodeID, + Depth: int(stage), // Drop nodes appear at the next depth + Name: "drop", + EventType: "DROP", + }) + + // Create links from nodes with drops to the drop node + for nid, dropCount := range dropsFromNode { + if dropCount == 0 { + continue + } + + percent := math.Round(float64(dropCount)*10000/float64(initialCount)) / 100 + links = append(links, Link{ + Source: nid, + Target: nodeID, + SessionsCount: int(dropCount), + Value: percent, + EventType: "DROP", + }) + } + + // Link previous drop node to carry forward drops + if stage > 1 && cumulativeDrops[stage-1] > 0 { + if prevDropID, hasPrevDrop := dropNodes[stage-1]; hasPrevDrop { + percent := math.Round(float64(cumulativeDrops[stage-1])*10000/float64(initialCount)) / 100 + links = append(links, Link{ + Source: prevDropID, + Target: nodeID, + SessionsCount: int(cumulativeDrops[stage-1]), + Value: percent, + EventType: "DROP", + }) + } + } + + nodeID++ + } + } + + // Filter and reindex + nodeHasConnection := make(map[int]bool) + for _, link := range links { + nodeHasConnection[link.Source] = true + nodeHasConnection[link.Target] = true + } + + var filteredNodes []Node + for _, node := range nodes { + if nodeHasConnection[node.ID] { + filteredNodes = append(filteredNodes, node) + } + } + + // Reassign IDs + nodeIDMap := make(map[int]int) + var finalNodes []Node + + for newID, node := range filteredNodes { + nodeIDMap[node.ID] = newID + node.ID = newID + finalNodes = append(finalNodes, node) + } + + // Update links + var finalLinks []Link + for _, link := range links { + srcID, srcExists := nodeIDMap[link.Source] + tgtID, tgtExists := nodeIDMap[link.Target] + + if srcExists && tgtExists { + link.Source = srcID + link.Target = tgtID + finalLinks = append(finalLinks, link) + } + } + + return JourneyResponse{Data: JourneyData{ + Nodes: finalNodes, + Links: finalLinks, + }}, nil +} + +func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { + events := p.MetricValue + if len(events) == 0 { + events = []string{"LOCATION"} + } + vals := make([]string, len(events)) + for i, v := range events { + vals[i] = fmt.Sprintf("'%s'", v) + } + laterCond := fmt.Sprintf("e.\"$event_name\" IN (%s)", strings.Join(vals, ",")) + startConds, _ := buildEventConditions(p.StartPoint, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "e"}) + excludeConds, _ := buildEventConditions(p.Exclude, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "e"}) + + firstBase := []string{`e."$event_name" = 'LOCATION'`} + if len(startConds) > 0 { + firstBase = append(firstBase, startConds...) + } + firstBase = append(firstBase, + fmt.Sprintf("e.project_id = %d", p.ProjectId), + "e.session_id IS NOT NULL", + fmt.Sprintf("e.created_at BETWEEN toDateTime('%s') AND toDateTime('%s')", + time.Unix(p.StartTimestamp/1000, 0).UTC().Format("2006-01-02 15:04:05"), + time.Unix(p.EndTimestamp/1000, 0).UTC().Format("2006-01-02 15:04:05"), + ), + ) + + journeyBase := []string{laterCond} + if len(excludeConds) > 0 { + journeyBase = append(journeyBase, "NOT ("+strings.Join(excludeConds, " AND ")) + } + journeyBase = append(journeyBase, + fmt.Sprintf("e.project_id = %d", p.ProjectId), + ) + + endTime := time.Unix(p.EndTimestamp/1000, 0).UTC().Format("2006-01-02 15:04:05") + + q := fmt.Sprintf(`WITH + first_hits AS ( + SELECT session_id, MIN(created_at) AS start_time + FROM product_analytics.events AS e + WHERE %s + GROUP BY session_id + ), + journey_events AS ( + SELECT + e.session_id, + e.distinct_id, + e."$event_name" AS event_name, + e.created_at, + multiIf( + e."$event_name" = 'LOCATION', JSONExtractString(toString(e."$properties"), 'url_path'), + e."$event_name" = 'CLICK', JSONExtractString(toString(e."$properties"), 'label'), + e."$event_name" = 'INPUT', JSONExtractString(toString(e."$properties"), 'label'), + NULL + ) AS event_property + FROM product_analytics.events AS e + JOIN first_hits AS f USING(session_id) + WHERE + e.created_at >= f.start_time + AND e.created_at <= toDateTime('%s') + AND %s + ), + event_with_prev AS ( + SELECT + session_id, + distinct_id, + event_name, + event_property, + created_at, + any(event_name) OVER (PARTITION BY session_id ORDER BY created_at ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS previous_event_name, + any(event_property) OVER (PARTITION BY session_id ORDER BY created_at ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS previous_event_property + FROM journey_events + ), + staged AS ( + SELECT + *, + sumIf(1, true) OVER (PARTITION BY session_id ORDER BY created_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS stage + FROM event_with_prev + ) +SELECT + stage AS stage, + event_name AS current_event_name, + event_property AS current_event_property, + COALESCE(previous_event_name, '') AS previous_event_name, + COALESCE(previous_event_property, '') AS previous_event_property, + COUNT(DISTINCT session_id) AS sessions_count +FROM staged +WHERE stage <= %d +GROUP BY + stage, + event_name, + event_property, + previous_event_name, + previous_event_property +ORDER BY stage, COUNT(DISTINCT session_id) DESC;`, + strings.Join(firstBase, " AND "), + endTime, + strings.Join(journeyBase, " AND "), + p.Columns, + ) + return q, nil +} diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index bbb4a69b4..0bde5e62f 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -50,6 +50,7 @@ const ( MetricTypeFunnel MetricType = "funnel" MetricTypeHeatmap MetricType = "heatmaps" MetricTypeSession MetricType = "heatmaps_session" + MetricUserJourney MetricType = "pathAnalysis" ) const ( @@ -64,12 +65,17 @@ type MetricPayload struct { Density int `json:"density"` MetricOf string `json:"metricOf"` MetricType MetricType `json:"metricType"` + MetricValue []string `json:"metricValue"` MetricFormat string `json:"metricFormat"` ViewType string `json:"viewType"` Name string `json:"name"` Series []Series `json:"series"` Limit int `json:"limit"` Page int `json:"page"` + StartPoint []Filter `json:"startPoint"` + Exclude []Filter `json:"exclude"` + Rows uint64 `json:"rows"` + Columns uint64 `json:"columns"` } type MetricOfTable string diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index caee9e0c4..1f3d44432 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -30,6 +30,8 @@ func NewQueryBuilder(p Payload) (QueryBuilder, error) { return HeatmapQueryBuilder{}, nil case MetricTypeSession: return HeatmapSessionQueryBuilder{}, nil + case MetricUserJourney: + return UserJourneyQueryBuilder{}, nil default: return nil, fmt.Errorf("unknown metric type: %s", p.MetricType) } diff --git a/backend/pkg/server/api/middleware.go b/backend/pkg/server/api/middleware.go index 423e7e0d9..518645f14 100644 --- a/backend/pkg/server/api/middleware.go +++ b/backend/pkg/server/api/middleware.go @@ -23,11 +23,11 @@ func (e *routerImpl) healthMiddleware(next http.Handler) http.Handler { func (e *routerImpl) corsMiddleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if e.cfg.UseAccessControlHeaders { - // Prepare headers for preflight requests - w.Header().Set("Access-Control-Allow-Origin", "*") - w.Header().Set("Access-Control-Allow-Methods", "POST,GET,PATCH,DELETE") - w.Header().Set("Access-Control-Allow-Headers", "Content-Type,Authorization,Content-Encoding") + if origin := r.Header.Get("Origin"); origin == "http://localhost:3333" { + w.Header().Set("Access-Control-Allow-Origin", origin) + w.Header().Set("Access-Control-Allow-Methods", "POST, GET, PATCH, DELETE, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, Content-Encoding") + w.Header().Set("Access-Control-Allow-Credentials", "true") } if r.Method == http.MethodOptions { w.Header().Set("Cache-Control", "max-age=86400") diff --git a/frontend/chart-request.http b/frontend/chart-request.http new file mode 100644 index 000000000..45d80a0a2 --- /dev/null +++ b/frontend/chart-request.http @@ -0,0 +1,151 @@ +### Login Request +POST https://foss.openreplay.com/api/login +Content-Type: application/json + +{ + "email": "shekar@openreplay.com", + "password": "Asayer#123" +} + +> {% +// Extract token from response +let json = response.body; + +// Check if response.body is a string that needs parsing +if (typeof json === "string") { + try { + json = JSON.parse(json); + } catch (e) { + return "Error parsing response: " + e.message; + } +} + +// Extract JWT token +const token = json.jwt; + +if (token) { + // Try different ways to set the token based on client type + if (typeof client !== "undefined" && client.global) { + client.global.set("API_TOKEN", token); + } else if (typeof pm !== "undefined" && pm.environment) { + pm.environment.set("API_TOKEN", token); + } else if (typeof process !== "undefined") { + process.env.API_TOKEN = token; + } else { + // Last resort - try to access global variables directly + API_TOKEN = token; // Set as global variable + } + + return "JWT token saved as API_TOKEN: " + token.substring(0, 10) + "..."; +} else { + return "No JWT token found in response"; +} +%} + +### Using the token in subsequent requests +GET https://foss.openreplay.com/api/account +Authorization: Bearer {{API_TOKEN}} + +### Chart Request - TIMESERIES (lineChart) +POST http://localhost:8080/v1/analytics/5/cards/try +Content-Type: application/json +Authorization: Bearer {{API_TOKEN}} + +{ + "startTimestamp": 1737216192000, + "endTimestamp": 1739894592000, + "density": 6, + "metricId": 1040, + "metricOf": "userCount", + "metricType": "timeseries", + "viewType": "lineChart", + "name": "CH - Users Trend", + "series": [ + { + "seriesId": 621, + "name": "Series First", + "index": 1, + "filter": { + "filters": [ + { + "type": "userCountry", + "isEvent": false, + "value": [ + "UN" + ], + "operator": "is", + "filters": [] + } + ], + "eventsOrder": "then" + } + }, + { + "seriesId": 621, + "name": "Series Second", + "index": 1, + "filter": { + "filters": [ + { + "type": "userCountry", + "isEvent": false, + "value": [ + "FR" + ], + "operator": "is", + "filters": [] + } + ], + "eventsOrder": "then" + } + } + ], + "page": 1, + "limit": 20, + "compareTo": [ + "PREV_7_DAYS" + ], + "config": { + "col": 2, + "row": 2, + "position": 0 + } +} + +### Chart Request - TABLE +POST http://localhost:8080/v1/analytics/65/cards/try +Content-Type: application/json +Authorization: Bearer {{API_TOKEN}} + +{ + "startTimestamp": 1737216192000, + "endTimestamp": 1744635600000, + "density": 6, + "metricId": 1040, + "metricOf": "referrer", + "metricType": "table", + "metricFormat": "sessionCount", + "viewType": "table", + "name": "CH - Users Trend", + "series": [ + { + "seriesId": 621, + "name": "Series First", + "index": 1, + "filter": { + "filters": [], + "eventsOrder": "then" + } + } + ], + "page": 1, + "limit": 20, + "compareTo": [ + "PREV_7_DAYS" + ], + "config": { + "col": 2, + "row": 2, + "position": 0 + } +} From f789ee1bda10320182bc1ec632e6d94767943a62 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Mon, 12 May 2025 12:07:32 +0200 Subject: [PATCH 19/30] feat(product_analytics): user journey - wip --- .../analytics/charts/metric_user_journey.go | 365 +++++++++++++----- backend/pkg/analytics/charts/model.go | 33 +- 2 files changed, 287 insertions(+), 111 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_user_journey.go b/backend/pkg/analytics/charts/metric_user_journey.go index e4acec780..82332ca6b 100644 --- a/backend/pkg/analytics/charts/metric_user_journey.go +++ b/backend/pkg/analytics/charts/metric_user_journey.go @@ -11,10 +11,11 @@ import ( // Node represents a point in the journey diagram. type Node struct { - Depth int `json:"depth"` - Name string `json:"name"` - EventType string `json:"eventType"` - ID int `json:"id"` + Depth int `json:"depth"` + Name string `json:"name"` + EventType string `json:"eventType"` + ID int `json:"id"` + StartingNode bool `json:"startingNode"` } // Link represents a transition between nodes. @@ -52,7 +53,7 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac defer rows.Close() type row struct { - Stage uint64 + Stage int64 CurrentEventName string CurrentEventProperty string PrevEventName string @@ -82,24 +83,47 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac rawData = append(rawData, r) } - // Group data by stage and determine max stage - dataByStage := make(map[uint64][]row) - var maxStage uint64 = 0 + // Group data by stage + dataByStage := make(map[int64][]row) + var minStage int64 = 0 + var maxStage int64 = 0 + for _, r := range rawData { dataByStage[r.Stage] = append(dataByStage[r.Stage], r) if r.Stage > maxStage { maxStage = r.Stage } + if r.Stage < minStage { + minStage = r.Stage + } } // Calculate total sessions per stage - stageTotals := make(map[uint64]uint64) + stageTotals := make(map[int64]uint64) for stage, stageRows := range dataByStage { for _, r := range stageRows { stageTotals[stage] += r.SessionsCount } } - initialCount := stageTotals[1] + + // Determine base count for percentage calculations + // We'll use the starting point (usually stage 1) as our base + var baseSessionsCount uint64 + if count, exists := stageTotals[1]; exists { + baseSessionsCount = count + } else { + // If stage 1 doesn't exist, use the first available positive stage + for stage := int64(0); stage <= maxStage; stage++ { + if count, exists := stageTotals[stage]; exists { + baseSessionsCount = count + break + } + } + } + + if baseSessionsCount == 0 { + baseSessionsCount = 1 // Prevent division by zero + } // Number of top nodes to display per stage topLimit := int(p.Rows) @@ -114,10 +138,20 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } // Map to store top paths for each stage - topPathsByStage := make(map[uint64]map[pathKey]bool) - pathCountsByStage := make(map[uint64]map[pathKey]uint64) + topPathsByStage := make(map[int64]map[pathKey]bool) + pathCountsByStage := make(map[int64]map[pathKey]uint64) + + for stage := minStage; stage <= maxStage; stage++ { + // Skip if this stage has no data + if _, exists := dataByStage[stage]; !exists { + continue + } + + // Sort rows within each stage by session count (descending) + sort.Slice(dataByStage[stage], func(i, j int) bool { + return dataByStage[stage][i].SessionsCount > dataByStage[stage][j].SessionsCount + }) - for stage := uint64(1); stage <= maxStage; stage++ { // Initialize maps for this stage topPathsByStage[stage] = make(map[pathKey]bool) pathCountsByStage[stage] = make(map[pathKey]uint64) @@ -144,7 +178,7 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac return paths[i].count > paths[j].count }) - // Mark top paths + // Mark top paths - take exactly topLimit or all if fewer available for i, pc := range paths { if i < topLimit { topPathsByStage[stage][pc.path] = true @@ -152,35 +186,78 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } } - // Step 2: Create nodes and track sessions + // Step 2: Create a normalized sequential depth mapping + // First, gather all stages that have data + var stagesWithData []int64 + for stage := range dataByStage { + stagesWithData = append(stagesWithData, stage) + } + + // Sort stages + sort.Slice(stagesWithData, func(i, j int) bool { + return stagesWithData[i] < stagesWithData[j] + }) + + var startingStage int64 + for _, s := range stagesWithData { + if s > 0 { + startingStage = s + break + } + } + + // Create a mapping from logical stage to display depth (ensuring no gaps) + stageToDepth := make(map[int64]int) + for i, stage := range stagesWithData { + stageToDepth[stage] = i + } + + // Determine depth of central node (stage 1 or equivalent) + var centralDepth int + if depth, exists := stageToDepth[1]; exists { + centralDepth = depth + } else { + // If stage 1 doesn't exist, use the first positive stage + for _, stage := range stagesWithData { + if stage > 0 { + centralDepth = stageToDepth[stage] + break + } + } + } + + // Step 3: Create nodes with normalized depths var nodes []Node var links []Link nodeID := 0 // Maps to track nodes and sessions - nodeMap := make(map[string]int) // Stage|EventName|EventProp → nodeID - othersNodes := make(map[uint64]int) // stage → "Others" nodeID - dropNodes := make(map[uint64]int) // stage → "Drop" nodeID + nodeMap := make(map[string]int) // Stage|EventName|EventProp → nodeID + othersNodes := make(map[int64]int) // stage → "Others" nodeID + dropNodes := make(map[int64]int) // stage → "Drop" nodeID incomingSessions := make(map[int]uint64) // nodeID → incoming sessions outgoingSessions := make(map[int]uint64) // nodeID → outgoing sessions - // Create all nodes first - for stage := uint64(1); stage <= maxStage; stage++ { + // Create all nodes using normalized depths + for _, stage := range stagesWithData { + displayDepth := stageToDepth[stage] + // Create regular nodes for top paths for path := range topPathsByStage[stage] { nodeKey := fmt.Sprintf("%d|%s|%s", stage, path.eventName, path.eventProp) nodeMap[nodeKey] = nodeID nodes = append(nodes, Node{ - ID: nodeID, - Depth: int(stage) - 1, - Name: path.eventProp, - EventType: path.eventName, + ID: nodeID, + Depth: displayDepth, + Name: path.eventProp, + EventType: path.eventName, + StartingNode: stage == startingStage, }) - // For stage 1, set incoming sessions - if stage == 1 { + // For the central stage (usually stage 1) or first stage, set incoming sessions + if (stage == 1) || (stage == minStage && minStage != 1) { incomingSessions[nodeID] = pathCountsByStage[stage][path] } @@ -203,14 +280,15 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac othersNodes[stage] = nodeID nodes = append(nodes, Node{ - ID: nodeID, - Depth: int(stage) - 1, - Name: "other", - EventType: "OTHER", + ID: nodeID, + Depth: displayDepth, + Name: "other", + EventType: "OTHER", + StartingNode: stage == startingStage, }) - // For stage 1, set incoming sessions for Others - if stage == 1 { + // For the central stage or first stage, set incoming sessions for Others + if (stage == 1) || (stage == minStage && minStage != 1) { incomingSessions[nodeID] = othersCount } @@ -219,7 +297,7 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } } - // Step 3: Create links between nodes + // Step 4: Create links between adjacent nodes only // Use a map to deduplicate links type linkKey struct { src int @@ -228,10 +306,18 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac linkSessions := make(map[linkKey]uint64) linkTypes := make(map[linkKey]string) - for stage := uint64(2); stage <= maxStage; stage++ { - for _, r := range dataByStage[stage] { + // For each stage (except the first), create links from the previous stage + for i := 1; i < len(stagesWithData); i++ { + currentStage := stagesWithData[i] + prevStage := stagesWithData[i-1] + + for _, r := range dataByStage[currentStage] { + // Skip if previous stage doesn't match expected + if r.Stage != currentStage { + continue + } + // Determine source node - prevStage := stage - 1 prevPathKey := fmt.Sprintf("%d|%s|%s", prevStage, r.PrevEventName, r.PrevEventProperty) srcID, hasSrc := nodeMap[prevPathKey] @@ -252,14 +338,14 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac var hasTgt bool // Check if this path is in the top paths for this stage - if topPathsByStage[stage][curPath] { + if topPathsByStage[currentStage][curPath] { // It's a top node - curPathKey := fmt.Sprintf("%d|%s|%s", stage, r.CurrentEventName, r.CurrentEventProperty) + curPathKey := fmt.Sprintf("%d|%s|%s", currentStage, r.CurrentEventName, r.CurrentEventProperty) tgtID = nodeMap[curPathKey] hasTgt = true } else { // It's part of Others - if othersID, hasOthers := othersNodes[stage]; hasOthers { + if othersID, hasOthers := othersNodes[currentStage]; hasOthers { tgtID = othersID hasTgt = true } @@ -284,9 +370,11 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } } - // Create deduplicated links + // Create deduplicated links with proper percentages for lk, count := range linkSessions { - percent := math.Round(float64(count)*10000/float64(initialCount)) / 100 + // Calculate percentage based on baseSessionsCount + percent := math.Round(float64(count)*10000/float64(baseSessionsCount)) / 100 + links = append(links, Link{ Source: lk.src, Target: lk.tgt, @@ -296,17 +384,25 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac }) } - // Step 4: Calculate drops and create drop nodes - cumulativeDrops := make(map[uint64]uint64) + // Step 5: Calculate drops and create drop nodes (only for stages ≥ 0) + // Process forward drops (positive stages only) + for i := 0; i < len(stagesWithData)-1; i++ { + stage := stagesWithData[i] + + // Skip negative stages for drops + if stage < 0 { + continue + } - for stage := uint64(1); stage < maxStage; stage++ { // Calculate new drops at this stage stageDrops := uint64(0) dropsFromNode := make(map[int]uint64) // nodeID -> drop count for _, node := range nodes { - nodeStage := uint64(node.Depth) + 1 - if nodeStage != stage { + nodeDepth := node.Depth + + // Skip if this node isn't in the current stage + if nodeDepth != stageToDepth[stage] { continue } @@ -320,65 +416,91 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } } - // Calculate cumulative drops - if stage == 1 { - cumulativeDrops[stage] = stageDrops - } else { - cumulativeDrops[stage] = cumulativeDrops[stage-1] + stageDrops + // Skip if no drops + if stageDrops == 0 { + continue } - // Create drop node if there are drops - if cumulativeDrops[stage] > 0 { - dropNodes[stage] = nodeID + // Determine next stage depth for drop node positioning + var dropDepth int + if i+1 < len(stagesWithData) { + dropDepth = stageToDepth[stagesWithData[i+1]] + } else { + dropDepth = stageToDepth[stage] + 1 + } - nodes = append(nodes, Node{ - ID: nodeID, - Depth: int(stage), // Drop nodes appear at the next depth - Name: "drop", - EventType: "DROP", - }) + // Create drop node + dropNodes[stage] = nodeID - // Create links from nodes with drops to the drop node - for nid, dropCount := range dropsFromNode { - if dropCount == 0 { - continue - } + nodes = append(nodes, Node{ + ID: nodeID, + Depth: dropDepth, + Name: "drop", + EventType: "DROP", + }) - percent := math.Round(float64(dropCount)*10000/float64(initialCount)) / 100 - links = append(links, Link{ - Source: nid, - Target: nodeID, - SessionsCount: int(dropCount), - Value: percent, - EventType: "DROP", - }) + // Create links from nodes with drops to the drop node + for nid, dropCount := range dropsFromNode { + if dropCount == 0 { + continue } - // Link previous drop node to carry forward drops - if stage > 1 && cumulativeDrops[stage-1] > 0 { - if prevDropID, hasPrevDrop := dropNodes[stage-1]; hasPrevDrop { - percent := math.Round(float64(cumulativeDrops[stage-1])*10000/float64(initialCount)) / 100 + // Calculate percentage based on baseSessionsCount + percent := math.Round(float64(dropCount)*10000/float64(baseSessionsCount)) / 100 + + links = append(links, Link{ + Source: nid, + Target: nodeID, + SessionsCount: int(dropCount), + Value: percent, + EventType: "DROP", + }) + } + + // Link previous drop node to current drop node to show accumulation + if i > 0 { + for j := i - 1; j >= 0; j-- { + prevStage := stagesWithData[j] + if prevDropID, hasPrevDrop := dropNodes[prevStage]; hasPrevDrop { + // Link previous drop to current drop to show accumulation + prevDropCount := uint64(0) + for _, link := range links { + if link.Target == prevDropID && link.EventType == "DROP" { + prevDropCount += uint64(link.SessionsCount) + } + } + + percent := math.Round(float64(prevDropCount)*10000/float64(baseSessionsCount)) / 100 + links = append(links, Link{ Source: prevDropID, Target: nodeID, - SessionsCount: int(cumulativeDrops[stage-1]), + SessionsCount: int(prevDropCount), Value: percent, EventType: "DROP", }) + break } } - - nodeID++ } + + nodeID++ } - // Filter and reindex + // Filter out nodes with no connections nodeHasConnection := make(map[int]bool) for _, link := range links { nodeHasConnection[link.Source] = true nodeHasConnection[link.Target] = true } + // Make sure central nodes are included even if they don't have links + for _, node := range nodes { + if node.Depth == centralDepth { + nodeHasConnection[node.ID] = true + } + } + var filteredNodes []Node for _, node := range nodes { if nodeHasConnection[node.ID] { @@ -386,7 +508,7 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } } - // Reassign IDs + // Reassign IDs to be sequential nodeIDMap := make(map[int]int) var finalNodes []Node @@ -396,7 +518,7 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac finalNodes = append(finalNodes, node) } - // Update links + // Update link references var finalLinks []Link for _, link := range links { srcID, srcExists := nodeIDMap[link.Source] @@ -428,7 +550,15 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { startConds, _ := buildEventConditions(p.StartPoint, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "e"}) excludeConds, _ := buildEventConditions(p.Exclude, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "e"}) - firstBase := []string{`e."$event_name" = 'LOCATION'`} + // use first element of StartPoint array for starting event + var startEvent string + if len(p.StartPoint) > 0 { + startEvent = string(p.StartPoint[0].Type) + } else { + startEvent = events[0] + } + + firstBase := []string{fmt.Sprintf("e.\"$event_name\" = '%s'", startEvent)} if len(startConds) > 0 { firstBase = append(firstBase, startConds...) } @@ -449,8 +579,19 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { fmt.Sprintf("e.project_id = %d", p.ProjectId), ) + startTime := time.Unix(p.StartTimestamp/1000, 0).UTC().Format("2006-01-02 15:04:05") endTime := time.Unix(p.EndTimestamp/1000, 0).UTC().Format("2006-01-02 15:04:05") + previousColumns := p.PreviousColumns + if previousColumns <= 0 { + previousColumns = 0 + } + + maxCols := p.Columns + if maxCols > 0 { + maxCols++ + } + q := fmt.Sprintf(`WITH first_hits AS ( SELECT session_id, MIN(created_at) AS start_time @@ -458,18 +599,18 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { WHERE %s GROUP BY session_id ), - journey_events AS ( + journey_events_after AS ( SELECT e.session_id, e.distinct_id, e."$event_name" AS event_name, e.created_at, - multiIf( - e."$event_name" = 'LOCATION', JSONExtractString(toString(e."$properties"), 'url_path'), - e."$event_name" = 'CLICK', JSONExtractString(toString(e."$properties"), 'label'), - e."$event_name" = 'INPUT', JSONExtractString(toString(e."$properties"), 'label'), - NULL - ) AS event_property + CASE + WHEN e."$event_name" = 'LOCATION' THEN JSONExtractString(toString(e."$properties"), 'url_path') + WHEN e."$event_name" = 'CLICK' THEN JSONExtractString(toString(e."$properties"), 'label') + WHEN e."$event_name" = 'INPUT' THEN JSONExtractString(toString(e."$properties"), 'label') + ELSE NULL + END AS event_property FROM product_analytics.events AS e JOIN first_hits AS f USING(session_id) WHERE @@ -477,6 +618,31 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { AND e.created_at <= toDateTime('%s') AND %s ), + journey_events_before AS ( + SELECT + e.session_id, + e.distinct_id, + e."$event_name" AS event_name, + e.created_at, + CASE + WHEN e."$event_name" = 'LOCATION' THEN JSONExtractString(toString(e."$properties"), 'url_path') + WHEN e."$event_name" = 'CLICK' THEN JSONExtractString(toString(e."$properties"), 'label') + WHEN e."$event_name" = 'INPUT' THEN JSONExtractString(toString(e."$properties"), 'label') + ELSE NULL + END AS event_property + FROM product_analytics.events AS e + JOIN first_hits AS f USING(session_id) + WHERE + e.created_at < f.start_time + AND e.created_at >= toDateTime('%s') + AND %s + AND %d > 0 -- Only fetch previous events if PreviousColumns > 0 + ), + journey_events_combined AS ( + SELECT *, 1 AS direction FROM journey_events_after + UNION ALL + SELECT *, -1 AS direction FROM journey_events_before + ), event_with_prev AS ( SELECT session_id, @@ -484,14 +650,19 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { event_name, event_property, created_at, + direction, any(event_name) OVER (PARTITION BY session_id ORDER BY created_at ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS previous_event_name, any(event_property) OVER (PARTITION BY session_id ORDER BY created_at ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS previous_event_property - FROM journey_events + FROM journey_events_combined ), staged AS ( SELECT *, - sumIf(1, true) OVER (PARTITION BY session_id ORDER BY created_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS stage + CASE + WHEN direction = 1 THEN toInt64(sumIf(1, true) OVER (PARTITION BY session_id, direction ORDER BY created_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) + WHEN direction = -1 THEN -1 * toInt64(sumIf(1, true) OVER (PARTITION BY session_id, direction ORDER BY created_at DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) + ELSE 0 + END AS stage FROM event_with_prev ) SELECT @@ -502,7 +673,7 @@ SELECT COALESCE(previous_event_property, '') AS previous_event_property, COUNT(DISTINCT session_id) AS sessions_count FROM staged -WHERE stage <= %d +WHERE stage <= %d AND stage >= -%d GROUP BY stage, event_name, @@ -513,7 +684,11 @@ ORDER BY stage, COUNT(DISTINCT session_id) DESC;`, strings.Join(firstBase, " AND "), endTime, strings.Join(journeyBase, " AND "), - p.Columns, + startTime, + strings.Join(journeyBase, " AND "), + previousColumns, + maxCols, + previousColumns, ) return q, nil } diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 0bde5e62f..959fa2537 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -60,22 +60,23 @@ const ( ) type MetricPayload struct { - StartTimestamp int64 `json:"startTimestamp"` - EndTimestamp int64 `json:"endTimestamp"` - Density int `json:"density"` - MetricOf string `json:"metricOf"` - MetricType MetricType `json:"metricType"` - MetricValue []string `json:"metricValue"` - MetricFormat string `json:"metricFormat"` - ViewType string `json:"viewType"` - Name string `json:"name"` - Series []Series `json:"series"` - Limit int `json:"limit"` - Page int `json:"page"` - StartPoint []Filter `json:"startPoint"` - Exclude []Filter `json:"exclude"` - Rows uint64 `json:"rows"` - Columns uint64 `json:"columns"` + StartTimestamp int64 `json:"startTimestamp"` + EndTimestamp int64 `json:"endTimestamp"` + Density int `json:"density"` + MetricOf string `json:"metricOf"` + MetricType MetricType `json:"metricType"` + MetricValue []string `json:"metricValue"` + MetricFormat string `json:"metricFormat"` + ViewType string `json:"viewType"` + Name string `json:"name"` + Series []Series `json:"series"` + Limit int `json:"limit"` + Page int `json:"page"` + StartPoint []Filter `json:"startPoint"` + Exclude []Filter `json:"exclude"` + Rows uint64 `json:"rows"` + Columns uint64 `json:"columns"` + PreviousColumns uint64 `json:"previousColumns"` } type MetricOfTable string From bf62be2a4a64e1cefd4c5b0e793a8bcc8d443a9f Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Mon, 12 May 2025 14:55:53 +0200 Subject: [PATCH 20/30] feat(product_analytics): user journey - wip --- .../analytics/charts/metric_user_journey.go | 45 ++++++++++++++++--- backend/pkg/analytics/charts/model.go | 2 +- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_user_journey.go b/backend/pkg/analytics/charts/metric_user_journey.go index 82332ca6b..a9f2ffff8 100644 --- a/backend/pkg/analytics/charts/metric_user_journey.go +++ b/backend/pkg/analytics/charts/metric_user_journey.go @@ -510,7 +510,7 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac // Reassign IDs to be sequential nodeIDMap := make(map[int]int) - var finalNodes []Node + var finalNodes []Node = make([]Node, 0, len(filteredNodes)) for newID, node := range filteredNodes { nodeIDMap[node.ID] = newID @@ -519,7 +519,7 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } // Update link references - var finalLinks []Link + var finalLinks []Link = make([]Link, 0, len(links)) for _, link := range links { srcID, srcExists := nodeIDMap[link.Source] tgtID, tgtExists := nodeIDMap[link.Target] @@ -538,6 +538,7 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { + // prepare event list filter events := p.MetricValue if len(events) == 0 { events = []string{"LOCATION"} @@ -547,10 +548,34 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { vals[i] = fmt.Sprintf("'%s'", v) } laterCond := fmt.Sprintf("e.\"$event_name\" IN (%s)", strings.Join(vals, ",")) + + // build start and exclude conditions startConds, _ := buildEventConditions(p.StartPoint, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "e"}) excludeConds, _ := buildEventConditions(p.Exclude, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "e"}) - // use first element of StartPoint array for starting event + // quote properties column correctly + fixProps := func(conds []string) []string { + for i, c := range conds { + conds[i] = strings.ReplaceAll(c, "e.$properties", "e.\"$properties\"") + } + return conds + } + startConds = fixProps(startConds) + excludeConds = fixProps(excludeConds) + + // extract global filters from first series + s := p.MetricPayload.Series[0] + var globalFilters []Filter + for _, flt := range s.Filter.Filters { + if flt.IsEvent { + continue + } + globalFilters = append(globalFilters, flt) + } + globalConds, _ := buildEventConditions(globalFilters, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "e"}) + globalConds = fixProps(globalConds) + + // determine starting event var startEvent string if len(p.StartPoint) > 0 { startEvent = string(p.StartPoint[0].Type) @@ -558,10 +583,14 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { startEvent = events[0] } + // assemble first_hits WHERE clause firstBase := []string{fmt.Sprintf("e.\"$event_name\" = '%s'", startEvent)} if len(startConds) > 0 { firstBase = append(firstBase, startConds...) } + if len(globalConds) > 0 { + firstBase = append(firstBase, globalConds...) + } firstBase = append(firstBase, fmt.Sprintf("e.project_id = %d", p.ProjectId), "e.session_id IS NOT NULL", @@ -571,27 +600,33 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { ), ) + // assemble journey WHERE clause journeyBase := []string{laterCond} if len(excludeConds) > 0 { - journeyBase = append(journeyBase, "NOT ("+strings.Join(excludeConds, " AND ")) + journeyBase = append(journeyBase, "NOT ("+strings.Join(excludeConds, " AND ")+")") + } + if len(globalConds) > 0 { + journeyBase = append(journeyBase, globalConds...) } journeyBase = append(journeyBase, fmt.Sprintf("e.project_id = %d", p.ProjectId), ) + // format time bounds startTime := time.Unix(p.StartTimestamp/1000, 0).UTC().Format("2006-01-02 15:04:05") endTime := time.Unix(p.EndTimestamp/1000, 0).UTC().Format("2006-01-02 15:04:05") + // set column limits previousColumns := p.PreviousColumns if previousColumns <= 0 { previousColumns = 0 } - maxCols := p.Columns if maxCols > 0 { maxCols++ } + // build final query q := fmt.Sprintf(`WITH first_hits AS ( SELECT session_id, MIN(created_at) AS start_time diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 959fa2537..b36d4bc67 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -73,7 +73,7 @@ type MetricPayload struct { Limit int `json:"limit"` Page int `json:"page"` StartPoint []Filter `json:"startPoint"` - Exclude []Filter `json:"exclude"` + Exclude []Filter `json:"excludes"` Rows uint64 `json:"rows"` Columns uint64 `json:"columns"` PreviousColumns uint64 `json:"previousColumns"` From adb88fd9fc1fad8f46a233ad74c0b5a4478fbf03 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Mon, 12 May 2025 15:06:25 +0200 Subject: [PATCH 21/30] feat(product_analytics): user journey - handle duration filter --- .../analytics/charts/metric_user_journey.go | 45 ++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_user_journey.go b/backend/pkg/analytics/charts/metric_user_journey.go index a9f2ffff8..243c3ffb4 100644 --- a/backend/pkg/analytics/charts/metric_user_journey.go +++ b/backend/pkg/analytics/charts/metric_user_journey.go @@ -5,6 +5,7 @@ import ( "math" "openreplay/backend/pkg/analytics/db" "sort" + "strconv" "strings" "time" ) @@ -563,10 +564,30 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { startConds = fixProps(startConds) excludeConds = fixProps(excludeConds) - // extract global filters from first series + // extract global filters and duration from first series s := p.MetricPayload.Series[0] + var durationMin, durationMax int64 + var okMin, okMax bool + var err error var globalFilters []Filter for _, flt := range s.Filter.Filters { + if flt.Type == "duration" { + if len(flt.Value) > 0 && flt.Value[0] != "" { + durationMin, err = strconv.ParseInt(flt.Value[0], 10, 64) + if err != nil { + return "", err + } + okMin = true + } + if len(flt.Value) > 1 && flt.Value[1] != "" { + durationMax, err = strconv.ParseInt(flt.Value[1], 10, 64) + if err != nil { + return "", err + } + okMax = true + } + continue + } if flt.IsEvent { continue } @@ -575,6 +596,16 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { globalConds, _ := buildEventConditions(globalFilters, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "e"}) globalConds = fixProps(globalConds) + // assemble duration condition + var durCond string + if okMin && okMax { + durCond = fmt.Sprintf("ss.duration BETWEEN %d AND %d", durationMin, durationMax) + } else if okMin { + durCond = fmt.Sprintf("ss.duration >= %d", durationMin) + } else if okMax { + durCond = fmt.Sprintf("ss.duration <= %d", durationMax) + } + // determine starting event var startEvent string if len(p.StartPoint) > 0 { @@ -583,7 +614,7 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { startEvent = events[0] } - // assemble first_hits WHERE clause + // assemble first_hits WHERE clause with optional duration firstBase := []string{fmt.Sprintf("e.\"$event_name\" = '%s'", startEvent)} if len(startConds) > 0 { firstBase = append(firstBase, startConds...) @@ -599,6 +630,9 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { time.Unix(p.EndTimestamp/1000, 0).UTC().Format("2006-01-02 15:04:05"), ), ) + if durCond != "" { + firstBase = append(firstBase, durCond) + } // assemble journey WHERE clause journeyBase := []string{laterCond} @@ -629,10 +663,11 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { // build final query q := fmt.Sprintf(`WITH first_hits AS ( - SELECT session_id, MIN(created_at) AS start_time + SELECT e.session_id, MIN(e.created_at) AS start_time FROM product_analytics.events AS e + JOIN experimental.sessions AS ss USING(session_id) WHERE %s - GROUP BY session_id + GROUP BY e.session_id ), journey_events_after AS ( SELECT @@ -671,7 +706,7 @@ func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { e.created_at < f.start_time AND e.created_at >= toDateTime('%s') AND %s - AND %d > 0 -- Only fetch previous events if PreviousColumns > 0 + AND %d > 0 ), journey_events_combined AS ( SELECT *, 1 AS direction FROM journey_events_after From 65ee3bcbb62c53b5ed98bd211fc51b0f31a6293c Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 13 May 2025 10:53:38 +0200 Subject: [PATCH 22/30] feat(product_analytics): funnel query and response fixes --- backend/pkg/analytics/charts/charts.go | 3 +- backend/pkg/analytics/charts/metric_funnel.go | 121 +++++++++++++----- .../analytics/charts/metric_user_journey.go | 4 +- backend/pkg/analytics/charts/model.go | 2 +- 4 files changed, 97 insertions(+), 33 deletions(-) diff --git a/backend/pkg/analytics/charts/charts.go b/backend/pkg/analytics/charts/charts.go index 49477b574..e9f24bc31 100644 --- a/backend/pkg/analytics/charts/charts.go +++ b/backend/pkg/analytics/charts/charts.go @@ -46,5 +46,6 @@ func (s *chartsImpl) GetData(projectId int, userID uint64, req *MetricPayload) ( return nil, fmt.Errorf("error executing query: %v", err) } - return resp, nil + //return resp, nil + return map[string]interface{}{"data": resp}, nil } diff --git a/backend/pkg/analytics/charts/metric_funnel.go b/backend/pkg/analytics/charts/metric_funnel.go index ec7de3267..59df29070 100644 --- a/backend/pkg/analytics/charts/metric_funnel.go +++ b/backend/pkg/analytics/charts/metric_funnel.go @@ -7,9 +7,12 @@ import ( ) type FunnelStepResult struct { - LevelNumber uint64 `json:"step"` - StepName string `json:"type"` - CountAtLevel uint64 `json:"count"` + LevelNumber uint64 `json:"step"` + StepName string `json:"type"` + CountAtLevel uint64 `json:"count"` + Operator string `json:"operator"` + Value []string `json:"value"` + DropPct float64 `json:"dropPct"` } type FunnelResponse struct { @@ -29,14 +32,44 @@ func (f FunnelQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, } defer rows.Close() + // extract step filters + s := p.MetricPayload.Series[0] + var stepFilters []Filter + for _, flt := range s.Filter.Filters { + if flt.IsEvent { + stepFilters = append(stepFilters, flt) + } + } + var steps []FunnelStepResult for rows.Next() { var r FunnelStepResult if err := rows.Scan(&r.LevelNumber, &r.StepName, &r.CountAtLevel); err != nil { return nil, err } + idx := int(r.LevelNumber) - 1 + if idx >= 0 && idx < len(stepFilters) { + r.Operator = stepFilters[idx].Operator + r.Value = stepFilters[idx].Value + } steps = append(steps, r) } + + // compute drop percentages + if len(steps) > 0 { + prev := steps[0].CountAtLevel + steps[0].DropPct = 0 + for i := 1; i < len(steps); i++ { + curr := steps[i].CountAtLevel + if prev > 0 { + steps[i].DropPct = (float64(prev-curr) / float64(prev)) * 100 + } else { + steps[i].DropPct = 0 + } + prev = curr + } + } + return FunnelResponse{Steps: steps}, nil } @@ -44,21 +77,24 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { if len(p.MetricPayload.Series) == 0 { return "", fmt.Errorf("series empty") } - s := p.MetricPayload.Series[0] metricFormat := p.MetricPayload.MetricFormat - // Separate global vs step filters - var globalFilters, stepFilters []Filter + var ( + globalFilters []Filter + stepFilters []Filter + sessionDurationFilter *Filter + ) for _, flt := range s.Filter.Filters { if flt.IsEvent { stepFilters = append(stepFilters, flt) + } else if flt.Type == "duration" { + sessionDurationFilter = &flt } else { globalFilters = append(globalFilters, flt) } } - // 1. Collect required mainColumns from all filters (including nested) requiredColumns := make(map[string]struct{}) var collectColumns func([]Filter) collectColumns = func(filters []Filter) { @@ -72,7 +108,6 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { collectColumns(globalFilters) collectColumns(stepFilters) - // 2. Build SELECT clause for CTE selectCols := []string{ `e.created_at`, `e."$event_name" AS event_name`, @@ -89,40 +124,52 @@ func (f FunnelQueryBuilder) buildQuery(p Payload) (string, error) { fmt.Sprintf("if('%s' = 'sessionCount', toString(e.session_id), coalesce(nullif(s.user_id,''),e.distinct_id)) AS entity_id", metricFormat), ) - // 3. Global conditions globalConds, _ := buildEventConditions(globalFilters, BuildConditionsOptions{ DefinedColumns: mainColumns, MainTableAlias: "e", PropertiesColumnName: "$properties", }) + base := []string{ fmt.Sprintf("e.created_at >= toDateTime(%d/1000)", p.MetricPayload.StartTimestamp), fmt.Sprintf("e.created_at < toDateTime(%d/1000)", p.MetricPayload.EndTimestamp+86400000), - "s.duration > 0", fmt.Sprintf("e.project_id = %d", p.ProjectId), } base = append(base, globalConds...) + if sessionDurationFilter != nil { + vals := sessionDurationFilter.Value + if len(vals) > 0 && vals[0] != "" { + base = append(base, fmt.Sprintf("s.duration >= %s", vals[0])) + } + if len(vals) > 1 && vals[1] != "" { + base = append(base, fmt.Sprintf("s.duration <= %s", vals[1])) + } + } where := strings.Join(base, " AND ") - // 4. Step conditions - var stepNames []string - var stepExprs []string - for i, filter := range stepFilters { - stepNames = append(stepNames, fmt.Sprintf("'%s'", filter.Type)) - stepConds, _ := buildEventConditions([]Filter{filter}, BuildConditionsOptions{ - DefinedColumns: cteColumnAliases(), // logical -> logical (CTE alias) + var ( + stepNames []string + stepExprs []string + clickCount int + ) + for i, flt := range stepFilters { + stepNames = append(stepNames, fmt.Sprintf("'%s'", flt.Type)) + conds, _ := buildEventConditions([]Filter{flt}, BuildConditionsOptions{ + DefinedColumns: cteColumnAliases(), PropertiesColumnName: "properties", MainTableAlias: "", }) - - stepCondExprs := []string{fmt.Sprintf("event_name = funnel_steps[%d]", i+1)} - if len(stepConds) > 0 { - stepCondExprs = append(stepCondExprs, stepConds...) + var exprParts []string + exprParts = append(exprParts, fmt.Sprintf("event_name = funnel_steps[%d]", i+1)) + if flt.Type == "CLICK" { + clickCount++ + exprParts = append(exprParts, fmt.Sprintf("click_idx = %d", clickCount)) } - stepExprs = append(stepExprs, fmt.Sprintf("(%s)", strings.Join(stepCondExprs, " AND "))) + exprParts = append(exprParts, conds...) + stepExprs = append(stepExprs, fmt.Sprintf("(%s)", strings.Join(exprParts, " AND "))) } - stepsArr := "[" + strings.Join(stepNames, ",") + "]" + stepsArr := fmt.Sprintf("[%s]", strings.Join(stepNames, ",")) windowArgs := strings.Join(stepExprs, ",\n ") q := fmt.Sprintf(` @@ -135,16 +182,28 @@ WITH FROM product_analytics.events AS e JOIN experimental.sessions AS s USING(session_id) WHERE %s + ORDER BY e.session_id, e.created_at + ), + numbered_clicks AS ( + SELECT + entity_id, + created_at, + row_number() OVER (PARTITION BY entity_id ORDER BY created_at) AS click_idx + FROM events_for_funnel + WHERE event_name = 'CLICK' ), funnel_levels_reached AS ( SELECT - entity_id, + ef.entity_id, windowFunnel(funnel_window_seconds)( - toDateTime(created_at), + toDateTime(ef.created_at), %s ) AS max_level - FROM events_for_funnel - GROUP BY entity_id + FROM events_for_funnel ef + LEFT JOIN numbered_clicks nc + ON ef.entity_id = nc.entity_id + AND ef.created_at = nc.created_at + GROUP BY ef.entity_id ), counts_by_level AS ( SELECT @@ -166,8 +225,12 @@ SELECT ifNull(c.cnt, 0) AS count_at_level FROM step_list AS s LEFT JOIN counts_by_level AS c ON s.level_number = c.level_number -ORDER BY s.level_number; -`, stepsArr, strings.Join(selectCols, ",\n "), where, windowArgs) +ORDER BY s.level_number;`, + stepsArr, + strings.Join(selectCols, ",\n "), + where, + windowArgs, + ) return q, nil } diff --git a/backend/pkg/analytics/charts/metric_user_journey.go b/backend/pkg/analytics/charts/metric_user_journey.go index 243c3ffb4..413f2289e 100644 --- a/backend/pkg/analytics/charts/metric_user_journey.go +++ b/backend/pkg/analytics/charts/metric_user_journey.go @@ -532,10 +532,10 @@ func (h UserJourneyQueryBuilder) Execute(p Payload, conn db.Connector) (interfac } } - return JourneyResponse{Data: JourneyData{ + return JourneyData{ Nodes: finalNodes, Links: finalLinks, - }}, nil + }, nil } func (h UserJourneyQueryBuilder) buildQuery(p Payload) (string, error) { diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index b36d4bc67..4345ccf7a 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -48,7 +48,7 @@ const ( MetricTypeTimeseries MetricType = "timeseries" MetricTypeTable MetricType = "table" MetricTypeFunnel MetricType = "funnel" - MetricTypeHeatmap MetricType = "heatmaps" + MetricTypeHeatmap MetricType = "heatMap" MetricTypeSession MetricType = "heatmaps_session" MetricUserJourney MetricType = "pathAnalysis" ) From 34c2ca281f1ead9a06c85a33c474924d3741ca45 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Thu, 15 May 2025 17:41:38 +0200 Subject: [PATCH 23/30] feat(product_analytics): errors table --- .../analytics/charts/metric_table_errors.go | 188 ++++++++++++++++++ backend/pkg/analytics/charts/query.go | 3 + 2 files changed, 191 insertions(+) create mode 100644 backend/pkg/analytics/charts/metric_table_errors.go diff --git a/backend/pkg/analytics/charts/metric_table_errors.go b/backend/pkg/analytics/charts/metric_table_errors.go new file mode 100644 index 000000000..96d70360c --- /dev/null +++ b/backend/pkg/analytics/charts/metric_table_errors.go @@ -0,0 +1,188 @@ +package charts + +import ( + "fmt" + "log" + "strings" + + "openreplay/backend/pkg/analytics/db" +) + +type TableErrorsQueryBuilder struct{} + +type ErrorChartPoint struct { + Timestamp int64 `json:"timestamp"` + Count uint64 `json:"count"` +} + +type ErrorItem struct { + ErrorID string `json:"errorId"` + Name string `json:"name"` + Message string `json:"message"` + Users uint64 `json:"users"` + Total uint64 `json:"total"` + Sessions uint64 `json:"sessions"` + FirstOccurrence int64 `json:"firstOccurrence"` + LastOccurrence int64 `json:"lastOccurrence"` + Chart []ErrorChartPoint `json:"chart"` +} + +type TableErrorsResponse struct { + Total uint64 `json:"total"` + Errors []ErrorItem `json:"errors"` +} + +func (t TableErrorsQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { + query, err := t.buildQuery(p) + if err != nil { + return nil, err + } + rows, err := conn.Query(query) + if err != nil { + log.Printf("Error executing query: %s\nQuery: %s", err, query) + return nil, err + } + defer rows.Close() + + var resp TableErrorsResponse + for rows.Next() { + var e ErrorItem + var ts []int64 + var cs []uint64 + if err := rows.Scan( + &e.ErrorID, &e.Name, &e.Message, + &e.Users, &e.Total, &e.Sessions, + &e.FirstOccurrence, &e.LastOccurrence, + &ts, &cs, + ); err != nil { + return nil, err + } + for i := range ts { + e.Chart = append(e.Chart, ErrorChartPoint{Timestamp: ts[i], Count: cs[i]}) + } + resp.Errors = append(resp.Errors, e) + } + resp.Total = uint64(len(resp.Errors)) + return resp, nil +} + +func (t TableErrorsQueryBuilder) buildQuery(p Payload) (string, error) { + if len(p.Series) == 0 { + return "", fmt.Errorf("payload Series cannot be empty") + } + + density := p.Density + if density < 2 { + density = 7 + } + durMs := p.EndTimestamp - p.StartTimestamp + stepMs := durMs / int64(density-1) + startMs := (p.StartTimestamp / 1000) * 1000 + endMs := (p.EndTimestamp / 1000) * 1000 + + limit := p.Limit + if limit <= 0 { + limit = 10 + } + page := p.Page + if page <= 0 { + page = 1 + } + offset := (page - 1) * limit + + ef, en := buildEventConditions( + p.Series[0].Filter.Filters, + BuildConditionsOptions{DefinedColumns: mainColumns}, + ) + conds := []string{ + "`$event_name` = 'ERROR'", + fmt.Sprintf("project_id = %d", p.ProjectId), + fmt.Sprintf("created_at >= toDateTime(%d/1000)", startMs), + fmt.Sprintf("created_at <= toDateTime(%d/1000)", endMs), + } + if len(ef) > 0 { + conds = append(conds, ef...) + } + if len(en) > 0 { + conds = append(conds, "`$event_name` IN ("+buildInClause(en)+")") + } + whereClause := strings.Join(conds, " AND ") + + sql := fmt.Sprintf(`WITH + events AS ( + SELECT + error_id, + JSONExtractString(toString("$properties"), 'name') AS name, + JSONExtractString(toString("$properties"), 'message') AS message, + distinct_id, + session_id, + created_at + FROM product_analytics.events + WHERE %s + ), + sessions_per_interval AS ( + SELECT + error_id, + toUInt64(%d + (toUInt64((toUnixTimestamp64Milli(created_at) - %d) / %d) * %d)) AS bucket_ts, + countDistinct(session_id) AS session_count + FROM events + GROUP BY error_id, bucket_ts + ), + buckets AS ( + SELECT + toUInt64(generate_series) AS bucket_ts + FROM generate_series( + %d, + %d, + %d + ) + ), + error_meta AS ( + SELECT + error_id, + name, + message, + countDistinct(distinct_id) AS users, + count() AS total, + countDistinct(session_id) AS sessions, + min(created_at) AS first_occurrence, + max(created_at) AS last_occurrence + FROM events + GROUP BY error_id, name, message + ), + error_chart AS ( + SELECT + e.error_id AS error_id, + groupArray(b.bucket_ts) AS timestamps, + groupArray(coalesce(s.session_count, 0)) AS counts + FROM (SELECT DISTINCT error_id FROM events) AS e + CROSS JOIN buckets AS b + LEFT JOIN sessions_per_interval AS s + ON s.error_id = e.error_id + AND s.bucket_ts = b.bucket_ts + GROUP BY e.error_id + ) +SELECT + m.error_id, + m.name, + m.message, + m.users, + m.total, + m.sessions, + toUnixTimestamp64Milli(toDateTime64(m.first_occurrence, 3)) AS first_occurrence, + toUnixTimestamp64Milli(toDateTime64(m.last_occurrence, 3)) AS last_occurrence, + ec.timestamps, + ec.counts +FROM error_meta AS m +LEFT JOIN error_chart AS ec + ON m.error_id = ec.error_id +ORDER BY m.last_occurrence DESC +LIMIT %d OFFSET %d;`, + whereClause, + startMs, startMs, stepMs, stepMs, // New formula parameters + startMs, endMs, stepMs, + limit, offset, + ) + + return sql, nil +} diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index 1f3d44432..e1d0049bb 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -25,6 +25,9 @@ func NewQueryBuilder(p Payload) (QueryBuilder, error) { case MetricTypeFunnel: return FunnelQueryBuilder{}, nil case MetricTypeTable: + if p.MetricOf == "jsException" { + return TableErrorsQueryBuilder{}, nil + } return TableQueryBuilder{}, nil case MetricTypeHeatmap: return HeatmapQueryBuilder{}, nil From 96b5e2e0cce68ee55326b9c5c81bc779b48db11f Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Mon, 19 May 2025 17:11:42 +0200 Subject: [PATCH 24/30] feat(product_analytics): table of cards fixes --- backend/pkg/analytics/charts/metric_table.go | 61 +++++++++++++------- backend/pkg/analytics/charts/model.go | 16 ++--- backend/pkg/analytics/charts/query.go | 51 ++++++++++++++++ 3 files changed, 98 insertions(+), 30 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index b1313d731..2f011a79e 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -37,18 +37,20 @@ const ( ) var propertySelectorMap = map[string]string{ - string(MetricOfTableBrowser): "main.$browser AS metric_value", - string(MetricOfTableDevice): "main.$device AS metric_value", - string(MetricOfTableCountry): "main.$country AS metric_value", + string(MetricOfTableLocation): "JSONExtractString(toString(main.$properties), 'url_path') AS metric_value", + //string(MetricOfTableUserId): "if(empty(sessions.user_id), 'Anonymous', sessions.user_id) AS metric_value", + string(MetricOfTableUserId): "if(empty(sessions.user_id) OR sessions.user_id IS NULL, 'Anonymous', sessions.user_id) AS metric_value", + string(MetricOfTableBrowser): "main.$browser AS metric_value", + //string(MetricOfTableDevice): "sessions.user_device AS metric_value", + string(MetricOfTableDevice): "if(empty(sessions.user_device) OR sessions.user_device IS NULL, 'Undefined', sessions.user_device) AS metric_value", + string(MetricOfTableCountry): "toString(sessions.user_country) AS metric_value", string(MetricOfTableReferrer): "main.$referrer AS metric_value", + string(MetricOfTableFetch): "JSONExtractString(toString(main.$properties), 'url_path') AS metric_value", } var mainColumns = map[string]string{ "userBrowser": "$browser", - "userDevice": "$device_type", - "userCountry": "$country", "referrer": "$referrer", - // TODO add more columns if needed } func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { @@ -77,12 +79,17 @@ func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, e } defer rows.Close() - var ( - overallTotalMetricValues uint64 - overallCount uint64 - values []TableValue - firstRow = true - ) + var overallTotalMetricValues uint64 + var overallCount uint64 + values := make([]TableValue, 0) + firstRow := true + + //var ( + // overallTotalMetricValues uint64 + // overallCount uint64 + // values []TableValue + // firstRow = true + //) for rows.Next() { var ( @@ -127,20 +134,32 @@ func (t TableQueryBuilder) buildQuery(r Payload, metricFormat string) (string, e originalMetricOf := r.MetricOf propertyName = originalMetricOf - eventFilters := s.Filter.Filters + durationConds, eventFilters := buildDurationWhere(s.Filter.Filters) eventConds, eventNames := buildEventConditions(eventFilters, BuildConditionsOptions{ DefinedColumns: mainColumns, }) - baseWhereConditions := []string{ fmt.Sprintf("main.created_at >= toDateTime(%d/1000)", r.StartTimestamp), fmt.Sprintf("main.created_at <= toDateTime(%d/1000)", r.EndTimestamp), - "sessions.duration > 0", + fmt.Sprintf("main.project_id = %d", r.ProjectId), + } + baseWhereConditions = append(baseWhereConditions, durationConds...) + + if cond := eventNameCondition("", r.MetricOf); cond != "" { + baseWhereConditions = append(baseWhereConditions, cond) } - if r.ProjectId > 0 { - baseWhereConditions = append(baseWhereConditions, fmt.Sprintf("main.project_id = %d", r.ProjectId)) - } + //baseWhereConditions := []string{ + // fmt.Sprintf("main.created_at >= toDateTime(%d/1000)", r.StartTimestamp), + // fmt.Sprintf("main.created_at <= toDateTime(%d/1000)", r.EndTimestamp), + // "sessions.duration > 0", + //} + // + + // + //if r.ProjectId > 0 { + // baseWhereConditions = append(baseWhereConditions, fmt.Sprintf("main.project_id = %d", r.ProjectId)) + //} var aggregationExpression string var aggregationAlias = "aggregation_id" @@ -154,9 +173,7 @@ func (t TableQueryBuilder) buildQuery(r Payload, metricFormat string) (string, e aggregationExpression = "main.session_id" } - var propertySelector string - var ok bool - propertySelector, ok = propertySelectorMap[originalMetricOf] + propertySelector, ok := propertySelectorMap[originalMetricOf] if !ok { propertySelector = fmt.Sprintf("JSONExtractString(toString(main.$properties), '%s') AS metric_value", propertyName) } @@ -196,7 +213,7 @@ func (t TableQueryBuilder) buildQuery(r Payload, metricFormat string) (string, e metric_value AS name, countDistinct(%s) AS value_count FROM filtered_data - WHERE name IS NOT NULL AND name != '' + -- WHERE name IS NOT NULL AND name != '' GROUP BY name ) SELECT diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 4345ccf7a..644eeaf44 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -82,12 +82,12 @@ type MetricPayload struct { type MetricOfTable string const ( - MetricOfTableLocation MetricOfTable = "url_path" // TOP Pages - MetricOfTableBrowser MetricOfTable = "user_browser" + MetricOfTableLocation MetricOfTable = "location" // TOP Pages + MetricOfTableBrowser MetricOfTable = "userBrowser" MetricOfTableReferrer MetricOfTable = "referrer" - MetricOfTableUserId MetricOfTable = "user_id" - MetricOfTableCountry MetricOfTable = "user_country" - MetricOfTableDevice MetricOfTable = "user_device" + MetricOfTableUserId MetricOfTable = "userId" + MetricOfTableCountry MetricOfTable = "userCountry" + MetricOfTableDevice MetricOfTable = "userDevice" MetricOfTableFetch MetricOfTable = "fetch" //MetricOfTableIssues MetricOfTable = "issues" @@ -136,9 +136,9 @@ const ( // Event filters const ( - FilterClick FilterType = "click" - FilterInput FilterType = "input" - FilterLocation FilterType = "location" + FilterClick FilterType = "CLICK" + FilterInput FilterType = "INPUT" + FilterLocation FilterType = "LOCATION" FilterTag FilterType = "tag" FilterCustom FilterType = "customEvent" FilterFetch FilterType = "fetch" diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index e1d0049bb..a770c7629 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -4,6 +4,7 @@ import ( "fmt" "log" "openreplay/backend/pkg/analytics/db" + "strconv" "strings" ) @@ -48,6 +49,7 @@ type BuildConditionsOptions struct { var propertyKeyMap = map[string]filterConfig{ "LOCATION": {LogicalProperty: "url_path"}, + "FETCH": {LogicalProperty: "url_path"}, "CLICK": {LogicalProperty: "label"}, "INPUT": {LogicalProperty: "label"}, "fetchUrl": {LogicalProperty: "url_path"}, @@ -120,6 +122,10 @@ func buildEventConditions(filters []Filter, options ...BuildConditionsOptions) ( } } for _, f := range filters { + if f.Type == FilterDuration { + continue + } + fConds, fNames := addFilter(f, opts) if len(fConds) > 0 { conds = append(conds, fConds...) @@ -415,3 +421,48 @@ func reverseLookup(m map[string]string, value string) string { } return "" } + +func eventNameCondition(table, metricOf string) string { + if table == "" { + table = "main" + } + switch metricOf { + case string(MetricOfTableFetch): + return fmt.Sprintf("%s.`$event_name` = 'REQUEST'", table) + case string(MetricOfTableLocation): + return fmt.Sprintf("%s.`$event_name` = 'LOCATION'", table) + default: + return "" + } +} + +func buildDurationWhere(filters []Filter) ([]string, []Filter) { + var conds []string + var rest []Filter + for _, f := range filters { + if string(f.Type) == "duration" { + v := f.Value + if len(v) == 1 { + if v[0] != "" { + if d, err := strconv.ParseInt(v[0], 10, 64); err == nil { + conds = append(conds, fmt.Sprintf("sessions.duration >= %d", d)) + } + } + } else if len(v) >= 2 { + if v[0] != "" { + if d, err := strconv.ParseInt(v[0], 10, 64); err == nil { + conds = append(conds, fmt.Sprintf("sessions.duration >= %d", d)) + } + } + if v[1] != "" { + if d, err := strconv.ParseInt(v[1], 10, 64); err == nil { + conds = append(conds, fmt.Sprintf("sessions.duration <= %d", d)) + } + } + } + } else { + rest = append(rest, f) + } + } + return conds, rest +} From d2b455dfdbc81561685aefac0ee8271a7b9de483 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Tue, 20 May 2025 11:28:50 +0200 Subject: [PATCH 25/30] feat(product_analytics): table of cards testing fitlers --- backend/pkg/analytics/charts/metric_table.go | 26 +++++++++++--------- backend/pkg/analytics/charts/query.go | 17 +++++++++++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index 2f011a79e..464da6b78 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -51,6 +51,7 @@ var propertySelectorMap = map[string]string{ var mainColumns = map[string]string{ "userBrowser": "$browser", "referrer": "$referrer", + "ISSUE": "issue_type", } func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { @@ -134,9 +135,20 @@ func (t TableQueryBuilder) buildQuery(r Payload, metricFormat string) (string, e originalMetricOf := r.MetricOf propertyName = originalMetricOf - durationConds, eventFilters := buildDurationWhere(s.Filter.Filters) + durationConds, _ := buildDurationWhere(s.Filter.Filters) + eventFilters, _ := filterOutTypes(s.Filter.Filters, []FilterType{FilterDuration, FilterUserId}) + _, sessionFilters := filterOutTypes(s.Filter.Filters, []FilterType{FilterUserId, FilterUserAnonymousId}) + + sessionConds, _ := buildEventConditions(sessionFilters, BuildConditionsOptions{ + DefinedColumns: map[string]string{ + "userId": "user_id", + }, + MainTableAlias: "sessions", + }) + eventConds, eventNames := buildEventConditions(eventFilters, BuildConditionsOptions{ DefinedColumns: mainColumns, + MainTableAlias: "main", }) baseWhereConditions := []string{ fmt.Sprintf("main.created_at >= toDateTime(%d/1000)", r.StartTimestamp), @@ -149,17 +161,7 @@ func (t TableQueryBuilder) buildQuery(r Payload, metricFormat string) (string, e baseWhereConditions = append(baseWhereConditions, cond) } - //baseWhereConditions := []string{ - // fmt.Sprintf("main.created_at >= toDateTime(%d/1000)", r.StartTimestamp), - // fmt.Sprintf("main.created_at <= toDateTime(%d/1000)", r.EndTimestamp), - // "sessions.duration > 0", - //} - // - - // - //if r.ProjectId > 0 { - // baseWhereConditions = append(baseWhereConditions, fmt.Sprintf("main.project_id = %d", r.ProjectId)) - //} + baseWhereConditions = append(baseWhereConditions, sessionConds...) var aggregationExpression string var aggregationAlias = "aggregation_id" diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index a770c7629..77e23b9e3 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -50,10 +50,12 @@ type BuildConditionsOptions struct { var propertyKeyMap = map[string]filterConfig{ "LOCATION": {LogicalProperty: "url_path"}, "FETCH": {LogicalProperty: "url_path"}, + "REQUEST": {LogicalProperty: "url_path"}, "CLICK": {LogicalProperty: "label"}, "INPUT": {LogicalProperty: "label"}, "fetchUrl": {LogicalProperty: "url_path"}, "fetchStatusCode": {LogicalProperty: "status", IsNumeric: true}, + //"ISSUE": {LogicalProperty: "issue_type"}, // TODO add more mappings as needed } @@ -466,3 +468,18 @@ func buildDurationWhere(filters []Filter) ([]string, []Filter) { } return conds, rest } + +func filterOutTypes(filters []Filter, typesToRemove []FilterType) (kept []Filter, removed []Filter) { + removeMap := make(map[FilterType]struct{}, len(typesToRemove)) + for _, t := range typesToRemove { + removeMap[t] = struct{}{} + } + for _, f := range filters { + if _, ok := removeMap[f.Type]; ok { + removed = append(removed, f) + } else { + kept = append(kept, f) + } + } + return +} From db0084f7a9ff6eec5843b54347d4f812437efa01 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Thu, 22 May 2025 10:42:47 +0200 Subject: [PATCH 26/30] feat(product_analytics): table of cards testing and improvements --- backend/pkg/analytics/charts/metric_table.go | 203 ++++++++++--------- backend/pkg/analytics/charts/model.go | 4 +- backend/pkg/analytics/charts/query.go | 4 +- 3 files changed, 106 insertions(+), 105 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index 464da6b78..2927e062f 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -49,9 +49,10 @@ var propertySelectorMap = map[string]string{ } var mainColumns = map[string]string{ - "userBrowser": "$browser", - "referrer": "$referrer", - "ISSUE": "issue_type", + "userBrowser": "main.$browser", + "userDevice": "sessions.user_device", + "referrer": "main.$referrer", + "ISSUE": "main.issue_type", } func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { @@ -85,13 +86,6 @@ func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, e values := make([]TableValue, 0) firstRow := true - //var ( - // overallTotalMetricValues uint64 - // overallCount uint64 - // values []TableValue - // firstRow = true - //) - for rows.Next() { var ( name string @@ -128,112 +122,119 @@ func (t TableQueryBuilder) buildQuery(r Payload, metricFormat string) (string, e } s := r.Series[0] - var propertyName string - if r.MetricOf == "" { - return "", fmt.Errorf("MetricOf is empty") + // sessions_data WHERE conditions + durConds, _ := buildDurationWhere(s.Filter.Filters) + sessFilters, _ := filterOutTypes(s.Filter.Filters, []FilterType{FilterDuration, FilterUserAnonymousId}) + sessConds, evtNames := buildEventConditions(sessFilters, BuildConditionsOptions{DefinedColumns: mainColumns, MainTableAlias: "main"}) + sessionDataConds := append(durConds, sessConds...) + // date range for sessions_data + sessionDataConds = append(sessionDataConds, + fmt.Sprintf("main.created_at BETWEEN toDateTime(%d/1000) AND toDateTime(%d/1000)", r.StartTimestamp, r.EndTimestamp), + ) + // clean empty + var sdClean []string + for _, c := range sessionDataConds { + if strings.TrimSpace(c) != "" { + sdClean = append(sdClean, c) + } } - originalMetricOf := r.MetricOf - propertyName = originalMetricOf - - durationConds, _ := buildDurationWhere(s.Filter.Filters) - eventFilters, _ := filterOutTypes(s.Filter.Filters, []FilterType{FilterDuration, FilterUserId}) - _, sessionFilters := filterOutTypes(s.Filter.Filters, []FilterType{FilterUserId, FilterUserAnonymousId}) - - sessionConds, _ := buildEventConditions(sessionFilters, BuildConditionsOptions{ - DefinedColumns: map[string]string{ - "userId": "user_id", - }, - MainTableAlias: "sessions", - }) - - eventConds, eventNames := buildEventConditions(eventFilters, BuildConditionsOptions{ - DefinedColumns: mainColumns, - MainTableAlias: "main", - }) - baseWhereConditions := []string{ - fmt.Sprintf("main.created_at >= toDateTime(%d/1000)", r.StartTimestamp), - fmt.Sprintf("main.created_at <= toDateTime(%d/1000)", r.EndTimestamp), - fmt.Sprintf("main.project_id = %d", r.ProjectId), + sessionDataWhere := "" + if len(sdClean) > 0 { + sessionDataWhere = "WHERE " + strings.Join(sdClean, " AND ") } - baseWhereConditions = append(baseWhereConditions, durationConds...) - - if cond := eventNameCondition("", r.MetricOf); cond != "" { - baseWhereConditions = append(baseWhereConditions, cond) + if len(evtNames) > 0 { + sessionDataWhere += fmt.Sprintf(" AND main.$event_name IN ('%s')", strings.Join(evtNames, "','")) } - baseWhereConditions = append(baseWhereConditions, sessionConds...) - - var aggregationExpression string - var aggregationAlias = "aggregation_id" - var specificWhereConditions []string - - if metricFormat == MetricFormatUserCount { - aggregationExpression = fmt.Sprintf("if(empty(sessions.user_id), toString(sessions.user_uuid), sessions.user_id)") - userExclusionCondition := fmt.Sprintf("NOT (empty(sessions.user_id) AND (sessions.user_uuid IS NULL OR sessions.user_uuid = '%s'))", nilUUIDString) - specificWhereConditions = append(specificWhereConditions, userExclusionCondition) - } else { - aggregationExpression = "main.session_id" - } - - propertySelector, ok := propertySelectorMap[originalMetricOf] + // filtered_data WHERE conditions + propSel, ok := propertySelectorMap[r.MetricOf] if !ok { - propertySelector = fmt.Sprintf("JSONExtractString(toString(main.$properties), '%s') AS metric_value", propertyName) + propSel = fmt.Sprintf("JSONExtractString(toString(main.$properties), '%s') AS metric_value", r.MetricOf) + } + parts := strings.SplitN(propSel, " AS ", 2) + propertyExpr := parts[0] + + tAgg := "main.session_id" + specConds := []string{} + if metricFormat == MetricFormatUserCount { + tAgg = "if(empty(sessions.user_id), toString(sessions.user_uuid), sessions.user_id)" + specConds = append(specConds, + fmt.Sprintf("NOT (empty(sessions.user_id) AND (sessions.user_uuid IS NULL OR sessions.user_uuid = '%s'))", nilUUIDString), + ) } - allWhereConditions := baseWhereConditions - if len(eventConds) > 0 { - allWhereConditions = append(allWhereConditions, eventConds...) + // metric-specific filter + _, mFilt := filterOutTypes(s.Filter.Filters, []FilterType{FilterType(r.MetricOf)}) + metricCond := eventNameCondition("", r.MetricOf) + if len(mFilt) > 0 { + //conds, _ := buildEventConditions(mFilt, BuildConditionsOptions{DefinedColumns: map[string]string{"userId": "user_id"}, MainTableAlias: "main"}) + //metricCond = strings.Join(conds, " AND ") } - if len(eventNames) > 0 { - allWhereConditions = append(allWhereConditions, "main.`$event_name` IN ("+buildInClause(eventNames)+")") + + filteredConds := []string{ + fmt.Sprintf("main.project_id = %d", r.ProjectId), + metricCond, + fmt.Sprintf("main.created_at BETWEEN toDateTime(%d/1000) AND toDateTime(%d/1000)", r.StartTimestamp, r.EndTimestamp), + } + filteredConds = append(filteredConds, specConds...) + // clean empty + var fClean []string + for _, c := range filteredConds { + if strings.TrimSpace(c) != "" { + fClean = append(fClean, c) + } + } + filteredWhere := "" + if len(fClean) > 0 { + filteredWhere = "WHERE " + strings.Join(fClean, " AND ") } - allWhereConditions = append(allWhereConditions, specificWhereConditions...) - whereClause := strings.Join(allWhereConditions, " AND ") limit := r.Limit if limit <= 0 { limit = 10 } - page := r.Page - if page <= 0 { - page = 1 - } - offset := (page - 1) * limit - limitClause := fmt.Sprintf("LIMIT %d OFFSET %d", limit, offset) + offset := (r.Page - 1) * limit query := fmt.Sprintf(` - WITH filtered_data AS ( - SELECT DISTINCT - %s, - %s AS %s - FROM product_analytics.events AS main - INNER JOIN experimental.sessions AS sessions ON main.session_id = sessions.session_id - WHERE %s - ), - grouped_values AS ( - SELECT - metric_value AS name, - countDistinct(%s) AS value_count - FROM filtered_data - -- WHERE name IS NOT NULL AND name != '' - GROUP BY name - ) - SELECT - (SELECT count() FROM grouped_values) AS overall_total_metric_values, - name, - value_count, - (SELECT countDistinct(%s) FROM filtered_data) AS overall_total_count - FROM grouped_values - ORDER BY value_count DESC - %s - `, - propertySelector, - aggregationExpression, - aggregationAlias, - whereClause, - aggregationAlias, - aggregationAlias, - limitClause) - +WITH sessions_data AS ( + SELECT session_id + FROM product_analytics.events AS main + JOIN experimental.sessions AS sessions USING (session_id) + %s + GROUP BY session_id +), +filtered_data AS ( + SELECT %s AS name, %s AS session_id + FROM product_analytics.events AS main + JOIN sessions_data USING (session_id) + JOIN experimental.sessions AS sessions USING (session_id) + %s +), +totals AS ( + SELECT count() AS overall_total_metric_values, + countDistinct(session_id) AS overall_total_count + FROM filtered_data +), +grouped_values AS ( + SELECT name, + countDistinct(session_id) AS value_count + FROM filtered_data + GROUP BY name +) +SELECT t.overall_total_metric_values, + g.name, + g.value_count, + t.overall_total_count +FROM grouped_values AS g +CROSS JOIN totals AS t +ORDER BY g.value_count DESC +LIMIT %d OFFSET %d;`, + sessionDataWhere, + propertyExpr, + tAgg, + filteredWhere, + limit, + offset, + ) return query, nil } diff --git a/backend/pkg/analytics/charts/model.go b/backend/pkg/analytics/charts/model.go index 644eeaf44..2318a5041 100644 --- a/backend/pkg/analytics/charts/model.go +++ b/backend/pkg/analytics/charts/model.go @@ -82,13 +82,13 @@ type MetricPayload struct { type MetricOfTable string const ( - MetricOfTableLocation MetricOfTable = "location" // TOP Pages + MetricOfTableLocation MetricOfTable = "LOCATION" // TOP Pages MetricOfTableBrowser MetricOfTable = "userBrowser" MetricOfTableReferrer MetricOfTable = "referrer" MetricOfTableUserId MetricOfTable = "userId" MetricOfTableCountry MetricOfTable = "userCountry" MetricOfTableDevice MetricOfTable = "userDevice" - MetricOfTableFetch MetricOfTable = "fetch" + MetricOfTableFetch MetricOfTable = "FETCH" //MetricOfTableIssues MetricOfTable = "issues" //MetricOfTableSessions MetricOfTable = "sessions" diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index 77e23b9e3..e28771e95 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -65,7 +65,6 @@ type filterConfig struct { IsNumeric bool } -// getColumnAccessor returns the column name for a logical property func getColumnAccessor(logical string, isNumeric bool, opts BuildConditionsOptions) string { // helper: wrap names starting with $ in quotes quote := func(name string) string { @@ -79,7 +78,8 @@ func getColumnAccessor(logical string, isNumeric bool, opts BuildConditionsOptio if col, ok := opts.DefinedColumns[logical]; ok { col = quote(col) if opts.MainTableAlias != "" { - return fmt.Sprintf("%s.%s", opts.MainTableAlias, col) + //return fmt.Sprintf("%s.%s", opts.MainTableAlias, col) + return fmt.Sprintf("%s", col) } return col } From 3e5f018b5838f150ea40c652b5dc8f17cc3c1ed4 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Thu, 22 May 2025 16:25:30 +0200 Subject: [PATCH 27/30] feat(product_analytics): table of cards testing and improvements --- frontend/chart-request.http | 151 ------------------------------------ 1 file changed, 151 deletions(-) delete mode 100644 frontend/chart-request.http diff --git a/frontend/chart-request.http b/frontend/chart-request.http deleted file mode 100644 index 45d80a0a2..000000000 --- a/frontend/chart-request.http +++ /dev/null @@ -1,151 +0,0 @@ -### Login Request -POST https://foss.openreplay.com/api/login -Content-Type: application/json - -{ - "email": "shekar@openreplay.com", - "password": "Asayer#123" -} - -> {% -// Extract token from response -let json = response.body; - -// Check if response.body is a string that needs parsing -if (typeof json === "string") { - try { - json = JSON.parse(json); - } catch (e) { - return "Error parsing response: " + e.message; - } -} - -// Extract JWT token -const token = json.jwt; - -if (token) { - // Try different ways to set the token based on client type - if (typeof client !== "undefined" && client.global) { - client.global.set("API_TOKEN", token); - } else if (typeof pm !== "undefined" && pm.environment) { - pm.environment.set("API_TOKEN", token); - } else if (typeof process !== "undefined") { - process.env.API_TOKEN = token; - } else { - // Last resort - try to access global variables directly - API_TOKEN = token; // Set as global variable - } - - return "JWT token saved as API_TOKEN: " + token.substring(0, 10) + "..."; -} else { - return "No JWT token found in response"; -} -%} - -### Using the token in subsequent requests -GET https://foss.openreplay.com/api/account -Authorization: Bearer {{API_TOKEN}} - -### Chart Request - TIMESERIES (lineChart) -POST http://localhost:8080/v1/analytics/5/cards/try -Content-Type: application/json -Authorization: Bearer {{API_TOKEN}} - -{ - "startTimestamp": 1737216192000, - "endTimestamp": 1739894592000, - "density": 6, - "metricId": 1040, - "metricOf": "userCount", - "metricType": "timeseries", - "viewType": "lineChart", - "name": "CH - Users Trend", - "series": [ - { - "seriesId": 621, - "name": "Series First", - "index": 1, - "filter": { - "filters": [ - { - "type": "userCountry", - "isEvent": false, - "value": [ - "UN" - ], - "operator": "is", - "filters": [] - } - ], - "eventsOrder": "then" - } - }, - { - "seriesId": 621, - "name": "Series Second", - "index": 1, - "filter": { - "filters": [ - { - "type": "userCountry", - "isEvent": false, - "value": [ - "FR" - ], - "operator": "is", - "filters": [] - } - ], - "eventsOrder": "then" - } - } - ], - "page": 1, - "limit": 20, - "compareTo": [ - "PREV_7_DAYS" - ], - "config": { - "col": 2, - "row": 2, - "position": 0 - } -} - -### Chart Request - TABLE -POST http://localhost:8080/v1/analytics/65/cards/try -Content-Type: application/json -Authorization: Bearer {{API_TOKEN}} - -{ - "startTimestamp": 1737216192000, - "endTimestamp": 1744635600000, - "density": 6, - "metricId": 1040, - "metricOf": "referrer", - "metricType": "table", - "metricFormat": "sessionCount", - "viewType": "table", - "name": "CH - Users Trend", - "series": [ - { - "seriesId": 621, - "name": "Series First", - "index": 1, - "filter": { - "filters": [], - "eventsOrder": "then" - } - } - ], - "page": 1, - "limit": 20, - "compareTo": [ - "PREV_7_DAYS" - ], - "config": { - "col": 2, - "row": 2, - "position": 0 - } -} From a486af574941da8302d32ce841abf6d932f02d92 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Fri, 23 May 2025 10:53:07 +0200 Subject: [PATCH 28/30] feat(product_analytics): general query fix --- backend/pkg/analytics/charts/metric_table.go | 6 +++--- backend/pkg/analytics/charts/query.go | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index 2927e062f..a456fa0ca 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -49,10 +49,10 @@ var propertySelectorMap = map[string]string{ } var mainColumns = map[string]string{ - "userBrowser": "main.$browser", + "userBrowser": "$browser", "userDevice": "sessions.user_device", - "referrer": "main.$referrer", - "ISSUE": "main.issue_type", + "referrer": "$referrer", + "ISSUE": "issue_type", } func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index e28771e95..4d42469d5 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -78,8 +78,10 @@ func getColumnAccessor(logical string, isNumeric bool, opts BuildConditionsOptio if col, ok := opts.DefinedColumns[logical]; ok { col = quote(col) if opts.MainTableAlias != "" { - //return fmt.Sprintf("%s.%s", opts.MainTableAlias, col) - return fmt.Sprintf("%s", col) + if strings.Contains(col, ".") { + return fmt.Sprintf("%s", col) + } + return fmt.Sprintf("%s.%s", opts.MainTableAlias, col) } return col } From 9df909d1127d68052a480e69d24d6aced2f95e47 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Fri, 23 May 2025 15:10:30 +0200 Subject: [PATCH 29/30] feat(product_analytics): teimseries improvements --- backend/pkg/analytics/charts/metric_timeseries.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_timeseries.go b/backend/pkg/analytics/charts/metric_timeseries.go index 419cb322b..e8aefb136 100644 --- a/backend/pkg/analytics/charts/metric_timeseries.go +++ b/backend/pkg/analytics/charts/metric_timeseries.go @@ -82,7 +82,12 @@ func (t TimeSeriesQueryBuilder) buildQuery(p Payload, s Series) (string, error) } func (TimeSeriesQueryBuilder) buildSessionCountQuery(p Payload, s Series) string { - eventConds, eventNames := buildEventConditions(s.Filter.Filters) + //eventConds, eventNames := buildEventConditions(s.Filter.Filters) + eventConds, eventNames := buildEventConditions(s.Filter.Filters, BuildConditionsOptions{ + DefinedColumns: mainColumns, + MainTableAlias: "main", + PropertiesColumnName: "$properties", + }) sessionConds := buildSessionConditions(s.Filter.Filters) staticEvt := buildStaticEventWhere(p) sessWhere, sessJoin := buildStaticSessionWhere(p, sessionConds) @@ -92,7 +97,11 @@ func (TimeSeriesQueryBuilder) buildSessionCountQuery(p Payload, s Series) string } func (TimeSeriesQueryBuilder) buildUserCountQuery(p Payload, s Series) string { - eventConds, eventNames := buildEventConditions(s.Filter.Filters) + eventConds, eventNames := buildEventConditions(s.Filter.Filters, BuildConditionsOptions{ + DefinedColumns: mainColumns, + MainTableAlias: "main", + PropertiesColumnName: "$properties", + }) sessionConds := buildSessionConditions(s.Filter.Filters) staticEvt := buildStaticEventWhere(p) sessWhere, sessJoin := buildStaticSessionWhere(p, sessionConds) @@ -134,6 +143,7 @@ func buildEventsSubQuery(metric string, eventConds, eventNames []string, staticE func buildMainQuery(p Payload, subQuery, metric string) string { step := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) + step = step * 1000 if metric == "sessionCount" { return fmt.Sprintf(sessionMainQueryTpl, p.StartTimestamp, p.EndTimestamp, step, subQuery, step) } @@ -205,6 +215,7 @@ var sessionNoFiltersSubQueryTpl = ` SELECT s.session_id AS session_id, s.datetime AS datetime FROM experimental.sessions AS s + WHERE %s ` From c750de69460e6e1264f68b8fa4bc5bbd96c85e14 Mon Sep 17 00:00:00 2001 From: Shekar Siri Date: Fri, 23 May 2025 17:00:44 +0200 Subject: [PATCH 30/30] feat(product_analytics): teimseries filter fixes --- backend/pkg/analytics/charts/metric_table.go | 9 +- .../pkg/analytics/charts/metric_timeseries.go | 263 ++++++------------ backend/pkg/analytics/charts/query.go | 12 +- 3 files changed, 98 insertions(+), 186 deletions(-) diff --git a/backend/pkg/analytics/charts/metric_table.go b/backend/pkg/analytics/charts/metric_table.go index a456fa0ca..d5251b567 100644 --- a/backend/pkg/analytics/charts/metric_table.go +++ b/backend/pkg/analytics/charts/metric_table.go @@ -49,10 +49,11 @@ var propertySelectorMap = map[string]string{ } var mainColumns = map[string]string{ - "userBrowser": "$browser", - "userDevice": "sessions.user_device", - "referrer": "$referrer", - "ISSUE": "issue_type", + "userBrowser": "$browser", + "userDevice": "sessions.user_device", + "referrer": "$referrer", + "fetchDuration": "$duration_s", + "ISSUE": "issue_type", } func (t TableQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { diff --git a/backend/pkg/analytics/charts/metric_timeseries.go b/backend/pkg/analytics/charts/metric_timeseries.go index e8aefb136..00f3e9325 100644 --- a/backend/pkg/analytics/charts/metric_timeseries.go +++ b/backend/pkg/analytics/charts/metric_timeseries.go @@ -11,236 +11,137 @@ import ( type TimeSeriesQueryBuilder struct{} func (t TimeSeriesQueryBuilder) Execute(p Payload, conn db.Connector) (interface{}, error) { - consolidated := map[uint64]map[string]uint64{} - - for _, s := range p.Series { - query, err := t.buildQuery(p, s) + data := make(map[uint64]map[string]uint64) + for _, series := range p.Series { + query, err := t.buildQuery(p, series) if err != nil { - log.Printf("Error building query for series %s: %v", s.Name, err) - return nil, fmt.Errorf("error building query for series %s: %v", s.Name, err) + log.Printf("buildQuery %s: %v", series.Name, err) + return nil, fmt.Errorf("series %s: %v", series.Name, err) } - rows, err := conn.Query(query) if err != nil { - log.Printf("Error executing query for series %s: %v", s.Name, err) - return nil, fmt.Errorf("error executing query for series %s: %v", s.Name, err) + log.Printf("exec %s: %v", series.Name, err) + return nil, fmt.Errorf("series %s: %v", series.Name, err) } - - var results []DataPoint + var pts []DataPoint for rows.Next() { - var res DataPoint - if err := rows.Scan(&res.Timestamp, &res.Count); err != nil { + var dp DataPoint + if err := rows.Scan(&dp.Timestamp, &dp.Count); err != nil { rows.Close() return nil, err } - results = append(results, res) + pts = append(pts, dp) } rows.Close() - filled := FillMissingDataPoints(p.StartTimestamp, p.EndTimestamp, p.Density, DataPoint{}, results, 1000) + filled := FillMissingDataPoints(p.StartTimestamp, p.EndTimestamp, p.Density, DataPoint{}, pts, 1000) for _, dp := range filled { - if _, ok := consolidated[dp.Timestamp]; !ok { - consolidated[dp.Timestamp] = map[string]uint64{} + if data[dp.Timestamp] == nil { + data[dp.Timestamp] = map[string]uint64{} } - consolidated[dp.Timestamp][s.Name] = dp.Count + data[dp.Timestamp][series.Name] = dp.Count } } var timestamps []uint64 - for ts := range consolidated { + for ts := range data { timestamps = append(timestamps, ts) } sort.Slice(timestamps, func(i, j int) bool { return timestamps[i] < timestamps[j] }) - var finalResults []map[string]interface{} + var result []map[string]interface{} for _, ts := range timestamps { row := map[string]interface{}{"timestamp": ts} - for _, s := range p.Series { - if count, ok := consolidated[ts][s.Name]; ok { - row[s.Name] = count - } else { - row[s.Name] = uint64(0) - } + for _, series := range p.Series { + row[series.Name] = data[ts][series.Name] } - finalResults = append(finalResults, row) + result = append(result, row) } - - return finalResults, nil + return result, nil } func (t TimeSeriesQueryBuilder) buildQuery(p Payload, s Series) (string, error) { - var query string switch p.MetricOf { case "sessionCount": - query = t.buildSessionCountQuery(p, s) + return t.buildTimeSeriesQuery(p, s, "sessionCount", "session_id"), nil case "userCount": - query = t.buildUserCountQuery(p, s) + return t.buildTimeSeriesQuery(p, s, "userCount", "user_id"), nil default: - query = "" + return "", fmt.Errorf("unsupported metric %q", p.MetricOf) } - return query, nil } -func (TimeSeriesQueryBuilder) buildSessionCountQuery(p Payload, s Series) string { - //eventConds, eventNames := buildEventConditions(s.Filter.Filters) - eventConds, eventNames := buildEventConditions(s.Filter.Filters, BuildConditionsOptions{ +func (t TimeSeriesQueryBuilder) buildTimeSeriesQuery(p Payload, s Series, metric, idField string) string { + sub := t.buildSubQuery(p, s, metric) + step := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) * 1000 + + return fmt.Sprintf( + "SELECT gs.generate_series AS timestamp, COALESCE(COUNT(DISTINCT ps.%s),0) AS count "+ + "FROM generate_series(%d,%d,%d) AS gs "+ + "LEFT JOIN (%s) AS ps ON TRUE "+ + "WHERE ps.datetime >= toDateTime(timestamp/1000) AND ps.datetime < toDateTime((timestamp+%d)/1000) "+ + "GROUP BY timestamp ORDER BY timestamp;", + idField, p.StartTimestamp, p.EndTimestamp, step, sub, step, + ) +} + +func (t TimeSeriesQueryBuilder) buildSubQuery(p Payload, s Series, metric string) string { + evConds, evNames := buildEventConditions(s.Filter.Filters, BuildConditionsOptions{ DefinedColumns: mainColumns, MainTableAlias: "main", PropertiesColumnName: "$properties", }) - sessionConds := buildSessionConditions(s.Filter.Filters) + sessConds := buildSessionConditions(s.Filter.Filters) staticEvt := buildStaticEventWhere(p) - sessWhere, sessJoin := buildStaticSessionWhere(p, sessionConds) - eventsSubQuery := buildEventsSubQuery("sessionCount", eventConds, eventNames, staticEvt, sessWhere, sessJoin) - mainQuery := buildMainQuery(p, eventsSubQuery, "sessionCount") - return mainQuery -} + sessWhere, sessJoin := buildStaticSessionWhere(p, sessConds) -func (TimeSeriesQueryBuilder) buildUserCountQuery(p Payload, s Series) string { - eventConds, eventNames := buildEventConditions(s.Filter.Filters, BuildConditionsOptions{ - DefinedColumns: mainColumns, - MainTableAlias: "main", - PropertiesColumnName: "$properties", - }) - sessionConds := buildSessionConditions(s.Filter.Filters) - staticEvt := buildStaticEventWhere(p) - sessWhere, sessJoin := buildStaticSessionWhere(p, sessionConds) - eventsSubQuery := buildEventsSubQuery("userCount", eventConds, eventNames, staticEvt, sessWhere, sessJoin) - mainQuery := buildMainQuery(p, eventsSubQuery, "userCount") - return mainQuery -} - -func buildEventsSubQuery(metric string, eventConds, eventNames []string, staticEvt, sessWhere, sessJoin string) string { - if len(eventConds) == 0 && len(eventNames) == 0 { + if len(evConds) == 0 && len(evNames) == 0 { if metric == "sessionCount" { - return fmt.Sprintf(sessionNoFiltersSubQueryTpl, sessJoin) + return fmt.Sprintf( + "SELECT s.session_id AS session_id, s.datetime AS datetime "+ + "FROM experimental.sessions AS s WHERE %s", + sessJoin, + ) } - return fmt.Sprintf(noFiltersSubQueryTpl, sessJoin) + return fmt.Sprintf( + "SELECT multiIf(s.user_id!='',s.user_id,s.user_anonymous_id!='',s.user_anonymous_id,toString(s.user_uuid)) AS user_id, s.datetime AS datetime "+ + "FROM experimental.sessions AS s WHERE %s", + sessJoin, + ) } - var evtNameClause string - var unique []string - for _, name := range eventNames { - if !contains(unique, name) { - unique = append(unique, name) + + uniq := make([]string, 0, len(evNames)) + for _, name := range evNames { + if !contains(uniq, name) { + uniq = append(uniq, name) } } - if len(unique) > 0 { - evtNameClause = fmt.Sprintf("AND main.`$event_name` IN (%s)", buildInClause(unique)) + nameClause := "" + if len(uniq) > 0 { + nameClause = fmt.Sprintf("AND main.`$event_name` IN (%s) ", buildInClause(uniq)) } + having := "" - if len(eventConds) > 0 { - having = buildHavingClause(eventConds) + if len(evConds) > 0 { + having = buildHavingClause(evConds) } - evtWhere := staticEvt - if len(eventConds) > 0 { - evtWhere += " AND " + strings.Join(eventConds, " AND ") + + whereEvt := staticEvt + if len(evConds) > 0 { + whereEvt += " AND " + strings.Join(evConds, " AND ") } - if metric == "sessionCount" { - return fmt.Sprintf(sessionSubQueryTpl, evtWhere, sessWhere, evtNameClause, having, sessJoin) - } - return fmt.Sprintf(subQueryTpl, evtWhere, sessWhere, evtNameClause, having, sessJoin) + + proj := map[string]string{ + "sessionCount": "s.session_id AS session_id", + "userCount": "multiIf(s.user_id!='',s.user_id,s.user_anonymous_id!='',s.user_anonymous_id,toString(s.user_uuid)) AS user_id", + }[metric] + ", s.datetime AS datetime" + + return fmt.Sprintf( + "SELECT %s FROM (SELECT main.session_id, MIN(main.created_at) AS first_event_ts, MAX(main.created_at) AS last_event_ts "+ + "FROM product_analytics.events AS main "+ + "WHERE %s AND main.session_id IN (SELECT s.session_id FROM experimental.sessions AS s WHERE %s) %s "+ + "GROUP BY main.session_id %s "+ + "INNER JOIN (SELECT * FROM experimental.sessions AS s WHERE %s) AS s ON s.session_id=f.session_id", + proj, whereEvt, sessWhere, nameClause, having, sessJoin, + ) } - -func buildMainQuery(p Payload, subQuery, metric string) string { - step := int(getStepSize(p.StartTimestamp, p.EndTimestamp, p.Density, false, 1000)) - step = step * 1000 - if metric == "sessionCount" { - return fmt.Sprintf(sessionMainQueryTpl, p.StartTimestamp, p.EndTimestamp, step, subQuery, step) - } - return fmt.Sprintf(mainQueryTpl, p.StartTimestamp, p.EndTimestamp, step, subQuery, step) -} - -var subQueryTpl = ` -SELECT multiIf( - s.user_id IS NOT NULL AND s.user_id != '', s.user_id, - s.user_anonymous_id IS NOT NULL AND s.user_anonymous_id != '', s.user_anonymous_id, - toString(s.user_uuid)) AS user_id, - s.datetime AS datetime -FROM ( - SELECT main.session_id, - MIN(main.created_at) AS first_event_ts, - MAX(main.created_at) AS last_event_ts - FROM product_analytics.events AS main - WHERE %s - AND main.session_id IN ( - SELECT s.session_id - FROM experimental.sessions AS s - WHERE %s - ) - %s - GROUP BY session_id - %s - INNER JOIN ( - SELECT * - FROM experimental.sessions AS s - WHERE %s - ) AS s ON (s.session_id = f.session_id) -` - -var noFiltersSubQueryTpl = ` -SELECT multiIf( - s.user_id IS NOT NULL AND s.user_id != '', s.user_id, - s.user_anonymous_id IS NOT NULL AND s.user_anonymous_id != '', s.user_anonymous_id, - toString(s.user_uuid)) AS user_id, - s.datetime AS datetime -FROM experimental.sessions AS s -WHERE %s -` - -var sessionSubQueryTpl = ` -SELECT s.session_id AS session_id, - s.datetime AS datetime -FROM ( - SELECT main.session_id, - MIN(main.created_at) AS first_event_ts, - MAX(main.created_at) AS last_event_ts - FROM product_analytics.events AS main - WHERE %s - AND main.session_id IN ( - SELECT s.session_id - FROM experimental.sessions AS s - WHERE %s - ) - %s - GROUP BY session_id - %s - INNER JOIN ( - SELECT * - FROM experimental.sessions AS s - WHERE %s - ) AS s ON (s.session_id = f.session_id) -` - -var sessionNoFiltersSubQueryTpl = ` -SELECT s.session_id AS session_id, - s.datetime AS datetime -FROM experimental.sessions AS s - -WHERE %s -` - -var mainQueryTpl = ` -SELECT gs.generate_series AS timestamp, - COALESCE(COUNT(DISTINCT processed_sessions.user_id), 0) AS count -FROM generate_series(%d, %d, %d) AS gs -LEFT JOIN ( - %s -) AS processed_sessions ON (TRUE) -WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000) - AND processed_sessions.datetime < toDateTime((timestamp + %d) / 1000) -GROUP BY timestamp -ORDER BY timestamp; -` - -var sessionMainQueryTpl = ` -SELECT gs.generate_series AS timestamp, - COALESCE(COUNT(DISTINCT processed_sessions.session_id), 0) AS count -FROM generate_series(%d, %d, %d) AS gs -LEFT JOIN ( - %s -) AS processed_sessions ON (TRUE) -WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000) - AND processed_sessions.datetime < toDateTime((timestamp + %d) / 1000) -GROUP BY timestamp -ORDER BY timestamp; -` diff --git a/backend/pkg/analytics/charts/query.go b/backend/pkg/analytics/charts/query.go index 4d42469d5..f87abb07d 100644 --- a/backend/pkg/analytics/charts/query.go +++ b/backend/pkg/analytics/charts/query.go @@ -55,6 +55,7 @@ var propertyKeyMap = map[string]filterConfig{ "INPUT": {LogicalProperty: "label"}, "fetchUrl": {LogicalProperty: "url_path"}, "fetchStatusCode": {LogicalProperty: "status", IsNumeric: true}, + //"fetchDuration": {LogicalProperty: "duration", IsNumeric: true}, //"ISSUE": {LogicalProperty: "issue_type"}, // TODO add more mappings as needed } @@ -68,6 +69,13 @@ type filterConfig struct { func getColumnAccessor(logical string, isNumeric bool, opts BuildConditionsOptions) string { // helper: wrap names starting with $ in quotes quote := func(name string) string { + prefix := opts.MainTableAlias + "." + if strings.HasPrefix(name, prefix) { + suffix := strings.TrimPrefix(name, prefix) + if strings.HasPrefix(suffix, "$") { + return fmt.Sprintf("%s.\"%s\"", opts.MainTableAlias, suffix) + } + } if strings.HasPrefix(name, "$") { return fmt.Sprintf("\"%s\"", name) } @@ -101,7 +109,7 @@ func getColumnAccessor(logical string, isNumeric bool, opts BuildConditionsOptio // JSON extraction if isNumeric { - return fmt.Sprintf("toFloat64(JSONExtractString(toString(%s), '%s'))", colName, propKey) + return fmt.Sprintf("JSONExtractFloat(toString(%s), '%s')", colName, propKey) } return fmt.Sprintf("JSONExtractString(toString(%s), '%s')", colName, propKey) } @@ -231,6 +239,8 @@ func buildCond(expr string, values []string, operator string, isNumeric bool) st case "in", "notIn": neg := operator == "notIn" return inClause(expr, values, neg, isNumeric) + case ">=", ">", "<=", "<": + return multiValCond(expr, values, "%s "+operator+" %s", isNumeric) default: if op, ok := compOps[operator]; ok { tmpl := "%s " + op + " %s"