openreplay/backend/pkg/analytics/query/table.go
2025-05-23 17:02:45 +02:00

252 lines
7.9 KiB
Go

package query
import (
"fmt"
"strings"
)
type TableQueryBuilder struct{}
func (t TableQueryBuilder) Build(p MetricPayload) string {
return t.buildQuery(p)
}
func (t TableQueryBuilder) buildQuery(r MetricPayload) string {
s := r.Series[0]
sessionFilters, eventFilters := partitionFilters(s.Filter.Filters)
sessionWhere := buildSessionWhere(sessionFilters)
eventWhere, seqHaving := buildEventsWhere(eventFilters, s.Filter.EventsOrder)
subQuery := fmt.Sprintf(
"SELECT %s,\n"+
" MIN(%s) AS first_event_ts,\n"+
" MAX(%s) AS last_event_ts\n"+
"FROM %s AS main\n"+
"WHERE main.project_id = %%(project_id)s\n"+
" AND %s >= toDateTime(%%(start_time)s/1000)\n"+
" AND %s <= toDateTime(%%(end_time)s/1000)\n"+
" AND (%s)\n"+
"GROUP BY %s\n"+
"HAVING %s",
ColEventSessionID,
ColEventTime,
ColEventTime,
TableEvents,
ColEventTime,
ColEventTime,
strings.Join(eventWhere, " OR "),
ColEventSessionID,
seqHaving,
)
joinQuery := fmt.Sprintf(
"SELECT *\n"+
"FROM %s AS s\n"+
"INNER JOIN (\n"+
" SELECT DISTINCT ev.session_id, ev.`$current_url` AS url_path\n"+
" FROM %s AS ev\n"+
" WHERE ev.created_at >= toDateTime(%%(start_time)s/1000)\n"+
" AND ev.created_at <= toDateTime(%%(end_time)s/1000)\n"+
" AND ev.project_id = %%(project_id)s\n"+
" AND ev.`$event_name` = 'LOCATION'\n"+
") AS extra_event USING (session_id)\n"+
"WHERE s.project_id = %%(project_id)s\n"+
" AND isNotNull(s.duration)\n"+
" AND s.datetime >= toDateTime(%%(start_time)s/1000)\n"+
" AND s.datetime <= toDateTime(%%(end_time)s/1000)\n",
TableSessions,
TableEvents,
)
if len(sessionWhere) > 0 {
joinQuery += " AND " + strings.Join(sessionWhere, " AND ") + "\n"
}
main := fmt.Sprintf(
"SELECT s.session_id AS session_id, s.url_path\n"+
"FROM (\n%s\n) AS f\n"+
"INNER JOIN (\n%s) AS s\n"+
" ON (s.session_id = f.session_id)\n",
subQuery,
joinQuery,
)
final := fmt.Sprintf(
"SELECT COUNT(DISTINCT url_path) OVER () AS main_count,\n"+
" url_path AS name,\n"+
" COUNT(DISTINCT session_id) AS total,\n"+
" COALESCE(SUM(COUNT(DISTINCT session_id)) OVER (), 0) AS total_count\n"+
"FROM (\n%s) AS filtered_sessions\n"+
"GROUP BY url_path\n"+
"ORDER BY total DESC\n"+
"LIMIT 200 OFFSET 0;",
main,
)
return final
}
func partitionFilters(filters []Filter) (sessionFilters []Filter, eventFilters []Filter) {
for _, f := range filters {
if f.IsEvent {
eventFilters = append(eventFilters, f)
} else {
sessionFilters = append(sessionFilters, f)
}
}
return
}
func buildSessionWhere(filters []Filter) []string {
var conds []string
for _, f := range filters {
switch f.Type {
case FilterUserCountry:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCountry, concatValues(f.Value)))
case FilterUserCity:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserCity, concatValues(f.Value)))
case FilterUserState:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserState, concatValues(f.Value)))
case FilterUserId:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserID, concatValues(f.Value)))
case FilterUserAnonymousId:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserAnonymousID, concatValues(f.Value)))
case FilterUserOs:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserOS, concatValues(f.Value)))
case FilterUserBrowser:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserBrowser, concatValues(f.Value)))
case FilterUserDevice:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDevice, concatValues(f.Value)))
case FilterPlatform:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUserDeviceType, concatValues(f.Value)))
case FilterRevId:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColRevID, concatValues(f.Value)))
case FilterReferrer:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColBaseReferrer, concatValues(f.Value)))
case FilterDuration:
if len(f.Value) == 2 {
conds = append(conds, fmt.Sprintf("%s >= '%s'", ColDuration, f.Value[0]))
conds = append(conds, fmt.Sprintf("%s <= '%s'", ColDuration, f.Value[1]))
}
case FilterUtmSource:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmSource, concatValues(f.Value)))
case FilterUtmMedium:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmMedium, concatValues(f.Value)))
case FilterUtmCampaign:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColUtmCampaign, concatValues(f.Value)))
case FilterMetadata:
conds = append(conds, fmt.Sprintf("%s = toString('%s')", ColMetadata1, concatValues(f.Value)))
}
}
// adding /n to each condition for better readability, can be removed.
for i := range conds {
conds[i] += "\n"
}
return conds
}
func concatValues(v []string) string {
return strings.Join(v, "")
}
func buildEventsWhere(filters []Filter, order EventOrder) (eventConditions []string, having string) {
basicEventTypes := "(" +
strings.Join([]string{
fmt.Sprintf("%s = 'CLICK'", ColEventName),
fmt.Sprintf("%s = 'INPUT'", ColEventName),
fmt.Sprintf("%s = 'LOCATION'", ColEventName),
fmt.Sprintf("%s = 'CUSTOM'", ColEventName),
fmt.Sprintf("%s = 'REQUEST'", ColEventName),
}, " OR ") + ")"
var seq []string
for _, f := range filters {
switch f.Type {
case FilterClick:
seq = append(seq, seqCond("CLICK", "selector", f))
case FilterInput:
seq = append(seq, seqCond("INPUT", "label", f))
case FilterLocation:
seq = append(seq, seqCond("LOCATION", "url_path", f))
case FilterCustom:
seq = append(seq, seqCond("CUSTOM", "name", f))
case FilterFetch:
seq = append(seq, seqFetchCond("REQUEST", f))
case FilterFetchStatusCode:
seq = append(seq, seqCond("REQUEST", "status", f))
default:
seq = append(seq, fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(string(f.Type))))
}
}
eventConditions = []string{basicEventTypes}
// then => sequenceMatch
// or => OR
// and => AND
switch order {
case EventOrderThen:
var pattern []string
for i := range seq {
pattern = append(pattern, fmt.Sprintf("(?%d)", i+1))
}
having = fmt.Sprintf("sequenceMatch('%s')(\n%s,\n%s)",
strings.Join(pattern, ""), fmt.Sprintf("toUnixTimestamp(%s)", ColEventTime), strings.Join(seq, ",\n"))
case EventOrderAnd:
// build AND
having = strings.Join(seq, " AND ")
default:
// default => OR
var orParts []string
for _, p := range seq {
orParts = append(orParts, "("+p+")")
}
having = strings.Join(orParts, " OR ")
}
return
}
func seqCond(eventName, key string, f Filter) string {
op := parseOperator(f.Operator)
return fmt.Sprintf("(%s = '%s' AND JSONExtractString(toString(%s), '%s') %s '%s')",
ColEventName, strings.ToUpper(eventName), ColEventProperties, key, op, concatValues(f.Value))
}
func seqFetchCond(eventName string, f Filter) string {
w := []string{fmt.Sprintf("(%s = '%s')", ColEventName, strings.ToUpper(eventName))}
var extras []string
for _, c := range f.Filters {
switch c.Type {
case FilterFetch:
if len(c.Value) > 0 {
extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventURLPath, concatValues(c.Value)))
}
case FilterFetchStatusCode:
if len(c.Value) > 0 {
extras = append(extras, fmt.Sprintf("(%s = '%s')", ColEventStatus, concatValues(c.Value)))
}
default:
// placeholder if needed
}
}
if len(extras) > 0 {
w = append(w, strings.Join(extras, " AND "))
}
return "(" + strings.Join(w, " AND ") + ")"
}
func parseOperator(op string) string {
// TODO implement this properly
switch strings.ToLower(op) {
case OperatorStringContains:
return "LIKE"
case OperatorStringIs, OperatorStringOn, "=", OperatorStringOnAny:
return "="
case OperatorStringStartsWith:
return "LIKE"
case OperatorStringEndsWith:
// might interpret differently in real impl
return "="
default:
return "="
}
}