From 691c84d5f4ac9d714e45ae583ca5d1064de495e9 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 8 Jun 2023 09:07:08 +0200 Subject: [PATCH] [HTTP] added city and state support for geo extractor (#1312) * feat(backend/http): added city and state support for geo extractor * feat(backend): use new GeoLite db in Dockerfile * feat(backend/http): added unit test --- backend/Dockerfile | 2 +- backend/internal/http/geoip/geoip.go | 50 ++++++++--- backend/internal/http/geoip/geoip_test.go | 88 ++++++++++++++++++++ backend/internal/http/geoip/http.go | 13 --- backend/internal/http/router/handlers-ios.go | 5 +- backend/internal/http/router/handlers-web.go | 11 ++- backend/internal/http/router/router.go | 8 ++ backend/internal/http/services/services.go | 4 +- backend/pkg/db/cache/messages-web.go | 28 ++++--- backend/pkg/db/postgres/messages-common.go | 6 +- backend/pkg/db/postgres/unstarted-session.go | 8 +- backend/pkg/db/types/session.go | 2 + 12 files changed, 170 insertions(+), 55 deletions(-) create mode 100644 backend/internal/http/geoip/geoip_test.go delete mode 100644 backend/internal/http/geoip/http.go diff --git a/backend/Dockerfile b/backend/Dockerfile index 5e1b6a6aa..9c0e75887 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -96,7 +96,7 @@ ENV TZ=UTC \ RUN if [ "$SERVICE_NAME" = "http" ]; then \ wget https://raw.githubusercontent.com/ua-parser/uap-core/master/regexes.yaml -O "$UAPARSER_FILE" &&\ - wget https://static.openreplay.com/geoip/GeoLite2-Country.mmdb -O "$MAXMINDDB_FILE"; fi + wget https://static.openreplay.com/geoip/GeoLite2-City.mmdb -O "$MAXMINDDB_FILE"; fi COPY --from=build /root/service /home/openreplay/service diff --git a/backend/internal/http/geoip/geoip.go b/backend/internal/http/geoip/geoip.go index 131996d7c..96086b643 100644 --- a/backend/internal/http/geoip/geoip.go +++ b/backend/internal/http/geoip/geoip.go @@ -1,41 +1,65 @@ package geoip import ( + "github.com/oschwald/maxminddb-golang" "log" "net" - - maxminddb "github.com/oschwald/maxminddb-golang" ) type geoIPRecord struct { Country struct { ISOCode string `maxminddb:"iso_code"` } `maxminddb:"country"` + States []struct { + Names map[string]string `maxminddb:"names"` + } `maxminddb:"subdivisions"` + City struct { + Names map[string]string `maxminddb:"names"` + } `maxminddb:"city"` } -type GeoIP struct { +type GeoRecord struct { + Country string + State string + City string +} + +type GeoParser interface { + Parse(ip net.IP) *GeoRecord +} + +type geoParser struct { r *maxminddb.Reader } -func NewGeoIP(file string) *GeoIP { +func New(file string) GeoParser { r, err := maxminddb.Open(file) if err != nil { log.Fatalln(err) } - return &GeoIP{r} + return &geoParser{r} } -func (geoIP *GeoIP) ExtractISOCode(ip net.IP) string { +func (geoIP *geoParser) Parse(ip net.IP) *GeoRecord { + res := &GeoRecord{ + Country: "UN", + State: "", + City: "", + } if ip == nil { - return "UN" + return res } - var code string var record geoIPRecord - if geoIP.r.Lookup(ip, &record) == nil { - code = record.Country.ISOCode + if err := geoIP.r.Lookup(ip, &record); err != nil { + log.Println(err) + return res } - if code == "" { - code = "UN" + if record.Country.ISOCode != "" { + res.Country = record.Country.ISOCode } - return code + if len(record.States) > 0 { + res.State = record.States[0].Names["en"] + } + res.City = record.City.Names["en"] + return res } diff --git a/backend/internal/http/geoip/geoip_test.go b/backend/internal/http/geoip/geoip_test.go new file mode 100644 index 000000000..ea2d54a09 --- /dev/null +++ b/backend/internal/http/geoip/geoip_test.go @@ -0,0 +1,88 @@ +package geoip + +import ( + "io" + "log" + "net" + "net/http" + "os" + "testing" +) + +func LoadGeoLiteDB() { + fileURL := "https://static.openreplay.com/geoip/GeoLite2-City.mmdb" + + // Create the file + file, err := os.Create("geo.mmdb") + if err != nil { + log.Fatal(err) + } + defer file.Close() + + // Download the file + response, err := http.Get(fileURL) + if err != nil { + log.Fatal(err) + } + defer response.Body.Close() + + // Check if the request was successful + if response.StatusCode != http.StatusOK { + log.Fatalf("Failed to download file: %s", response.Status) + } + + // Copy the downloaded file to the local file + _, err = io.Copy(file, response.Body) + if err != nil { + log.Fatal(err) + } +} + +func DeleteGeoLiteDB() { + if err := os.Remove("geo.mmdb"); err != nil { + log.Fatal(err) + } +} + +func TestGeoIP(t *testing.T) { + LoadGeoLiteDB() + defer DeleteGeoLiteDB() + + geoIP := New("geo.mmdb") + + ip := net.ParseIP("92.151.113.120") + correctResult := &GeoRecord{ + Country: "FR", + State: "Île-de-France", + City: "Courbevoie", + } + result := geoIP.Parse(ip) + + if result.Country != correctResult.Country { + t.Errorf("Country is incorrect: %s != %s", result.Country, correctResult.Country) + } + if result.State != correctResult.State { + t.Errorf("State is incorrect: %s != %s", result.State, correctResult.State) + } + if result.City != correctResult.City { + t.Errorf("City is incorrect: %s != %s", result.City, correctResult.City) + } + + emptyIP := net.ParseIP("") + correctResult = &GeoRecord{ + Country: "UN", + State: "", + City: "", + } + result = geoIP.Parse(emptyIP) + + if result.Country != correctResult.Country { + t.Errorf("Country is incorrect: %s != %s", result.Country, correctResult.Country) + } + if result.State != correctResult.State { + t.Errorf("State is incorrect: %s != %s", result.State, correctResult.State) + } + if result.City != correctResult.City { + t.Errorf("City is incorrect: %s != %s", result.City, correctResult.City) + } +} diff --git a/backend/internal/http/geoip/http.go b/backend/internal/http/geoip/http.go deleted file mode 100644 index 49f919b92..000000000 --- a/backend/internal/http/geoip/http.go +++ /dev/null @@ -1,13 +0,0 @@ -package geoip - -import ( - "net" - "net/http" - - "github.com/tomasen/realip" -) - -func (geoIP *GeoIP) ExtractISOCodeFromHTTPRequest(r *http.Request) string { - ip := net.ParseIP(realip.FromRequest(r)) - return geoIP.ExtractISOCode(ip) -} diff --git a/backend/internal/http/router/handlers-ios.go b/backend/internal/http/router/handlers-ios.go index 6b05cef58..434d87ca3 100644 --- a/backend/internal/http/router/handlers-ios.go +++ b/backend/internal/http/router/handlers-ios.go @@ -71,8 +71,7 @@ func (e *Router) startSessionHandlerIOS(w http.ResponseWriter, r *http.Request) // TODO: if EXPIRED => send message for two sessions association expTime := startTime.Add(time.Duration(p.MaxSessionDuration) * time.Millisecond) tokenData = &token.TokenData{sessionID, 0, expTime.UnixMilli()} - - country := e.services.GeoIP.ExtractISOCodeFromHTTPRequest(r) + geoInfo := e.ExtractGeoData(r) // The difference with web is mostly here: sessStart := &IOSSessionStart{ @@ -85,7 +84,7 @@ func (e *Router) startSessionHandlerIOS(w http.ResponseWriter, r *http.Request) UserOSVersion: req.UserOSVersion, UserDevice: ios.MapIOSDevice(req.UserDevice), UserDeviceType: ios.GetIOSDeviceType(req.UserDevice), - UserCountry: country, + UserCountry: geoInfo.Country, } e.services.Producer.Produce(e.cfg.TopicRawIOS, tokenData.ID, sessStart.Encode()) } diff --git a/backend/internal/http/router/handlers-web.go b/backend/internal/http/router/handlers-web.go index 898427dd2..6fd469f8e 100644 --- a/backend/internal/http/router/handlers-web.go +++ b/backend/internal/http/router/handlers-web.go @@ -140,6 +140,7 @@ func (e *Router) startSessionHandlerWeb(w http.ResponseWriter, r *http.Request) Delay: startTimeMili - req.Timestamp, ExpTime: expTime.UnixMilli(), } + geoInfo := e.ExtractGeoData(r) sessionStart := &SessionStart{ Timestamp: getSessionTimestamp(req, startTimeMili), @@ -154,14 +155,14 @@ func (e *Router) startSessionHandlerWeb(w http.ResponseWriter, r *http.Request) UserBrowserVersion: ua.BrowserVersion, UserDevice: ua.Device, UserDeviceType: ua.DeviceType, - UserCountry: e.services.GeoIP.ExtractISOCodeFromHTTPRequest(r), + UserCountry: geoInfo.Country, UserDeviceMemorySize: req.DeviceMemory, UserDeviceHeapSize: req.JsHeapSizeLimit, UserID: req.UserID, } // Save sessionStart to db - if err := e.services.Database.InsertWebSessionStart(sessionID, sessionStart); err != nil { + if err := e.services.Database.InsertWebSessionStart(sessionID, sessionStart, geoInfo); err != nil { log.Printf("can't insert session start: %s", err) } @@ -257,7 +258,7 @@ func (e *Router) notStartedHandlerWeb(w http.ResponseWriter, r *http.Request) { ResponseWithError(w, http.StatusForbidden, errors.New("browser not recognized"), startTime, r.URL.Path, bodySize) return } - country := e.services.GeoIP.ExtractISOCodeFromHTTPRequest(r) + geoInfo := e.ExtractGeoData(r) err = e.services.Database.InsertUnstartedSession(postgres.UnstartedSession{ ProjectKey: *req.ProjectKey, TrackerVersion: req.TrackerVersion, @@ -270,7 +271,9 @@ func (e *Router) notStartedHandlerWeb(w http.ResponseWriter, r *http.Request) { UserBrowserVersion: ua.BrowserVersion, UserDevice: ua.Device, UserDeviceType: ua.DeviceType, - UserCountry: country, + UserCountry: geoInfo.Country, + UserState: geoInfo.State, + UserCity: geoInfo.City, }) if err != nil { log.Printf("Unable to insert Unstarted Session: %v\n", err) diff --git a/backend/internal/http/router/router.go b/backend/internal/http/router/router.go index f73a20d93..501d0f001 100644 --- a/backend/internal/http/router/router.go +++ b/backend/internal/http/router/router.go @@ -2,8 +2,11 @@ package router import ( "fmt" + "github.com/tomasen/realip" "log" + "net" "net/http" + "openreplay/backend/internal/http/geoip" "sync" "time" @@ -86,6 +89,11 @@ func (e *Router) clearBeaconSizes() { } } +func (e *Router) ExtractGeoData(r *http.Request) *geoip.GeoRecord { + ip := net.ParseIP(realip.FromRequest(r)) + return e.services.GeoIP.Parse(ip) +} + func (e *Router) init() { e.router = mux.NewRouter() diff --git a/backend/internal/http/services/services.go b/backend/internal/http/services/services.go index fcdde26ff..0dbadd340 100644 --- a/backend/internal/http/services/services.go +++ b/backend/internal/http/services/services.go @@ -16,7 +16,7 @@ type ServicesBuilder struct { Producer types.Producer Flaker *flakeid.Flaker UaParser *uaparser.UAParser - GeoIP *geoip.GeoIP + GeoIP geoip.GeoParser Tokenizer *token.Tokenizer Storage *storage.S3 } @@ -28,7 +28,7 @@ func New(cfg *http.Config, producer types.Producer, pgconn *cache.PGCache) *Serv Storage: storage.NewS3(cfg.AWSRegion, cfg.S3BucketIOSImages, cfg.UseFileTags()), Tokenizer: token.NewTokenizer(cfg.TokenSecret), UaParser: uaparser.NewUAParser(cfg.UAParserFile), - GeoIP: geoip.NewGeoIP(cfg.MaxMinDBFile), + GeoIP: geoip.New(cfg.MaxMinDBFile), Flaker: flakeid.NewFlaker(cfg.WorkerID), } } diff --git a/backend/pkg/db/cache/messages-web.go b/backend/pkg/db/cache/messages-web.go index 58c703318..e33d203d5 100644 --- a/backend/pkg/db/cache/messages-web.go +++ b/backend/pkg/db/cache/messages-web.go @@ -2,24 +2,26 @@ package cache import ( "fmt" + "openreplay/backend/internal/http/geoip" . "openreplay/backend/pkg/db/types" . "openreplay/backend/pkg/messages" ) -func (c *PGCache) InsertWebSessionStart(sessionID uint64, s *SessionStart) error { +func (c *PGCache) InsertWebSessionStart(sessionID uint64, s *SessionStart, geo *geoip.GeoRecord) error { return c.Conn.InsertSessionStart(sessionID, &Session{ - SessionID: sessionID, - Platform: "web", - Timestamp: s.Timestamp, - ProjectID: uint32(s.ProjectID), - TrackerVersion: s.TrackerVersion, - RevID: s.RevID, - UserUUID: s.UserUUID, - UserOS: s.UserOS, - UserOSVersion: s.UserOSVersion, - UserDevice: s.UserDevice, - UserCountry: s.UserCountry, - // web properties (TODO: unite different platform types) + SessionID: sessionID, + Platform: "web", + Timestamp: s.Timestamp, + ProjectID: uint32(s.ProjectID), + TrackerVersion: s.TrackerVersion, + RevID: s.RevID, + UserUUID: s.UserUUID, + UserOS: s.UserOS, + UserOSVersion: s.UserOSVersion, + UserDevice: s.UserDevice, + UserCountry: geo.Country, + UserState: geo.State, + UserCity: geo.City, UserAgent: s.UserAgent, UserBrowser: s.UserBrowser, UserBrowserVersion: s.UserBrowserVersion, diff --git a/backend/pkg/db/postgres/messages-common.go b/backend/pkg/db/postgres/messages-common.go index 1ffdb8ddf..eb7508168 100644 --- a/backend/pkg/db/postgres/messages-common.go +++ b/backend/pkg/db/postgres/messages-common.go @@ -29,7 +29,7 @@ func (conn *Conn) InsertSessionStart(sessionID uint64, s *types.Session) error { tracker_version, issue_score, platform, user_agent, user_browser, user_browser_version, user_device_memory_size, user_device_heap_size, - user_id + user_id, user_state, user_city ) VALUES ( $1, $2, $3, $4, $5, $6, $7, @@ -38,7 +38,7 @@ func (conn *Conn) InsertSessionStart(sessionID uint64, s *types.Session) error { $11, $12, $13, NULLIF($14, ''), NULLIF($15, ''), NULLIF($16, ''), NULLIF($17, 0), NULLIF($18, 0::bigint), - NULLIF(LEFT($19, 8000), '') + NULLIF(LEFT($19, 8000), ''), NULLIF($20, ''), NULLIF($21, '') )`, sessionID, s.ProjectID, s.Timestamp, s.UserUUID, s.UserDevice, s.UserDeviceType, s.UserCountry, @@ -47,7 +47,7 @@ func (conn *Conn) InsertSessionStart(sessionID uint64, s *types.Session) error { s.TrackerVersion, s.Timestamp/1000, s.Platform, s.UserAgent, s.UserBrowser, s.UserBrowserVersion, s.UserDeviceMemorySize, s.UserDeviceHeapSize, - s.UserID, + s.UserID, s.UserState, s.UserCity, ) } diff --git a/backend/pkg/db/postgres/unstarted-session.go b/backend/pkg/db/postgres/unstarted-session.go index cc27e3f5d..e1e21d1e1 100644 --- a/backend/pkg/db/postgres/unstarted-session.go +++ b/backend/pkg/db/postgres/unstarted-session.go @@ -13,6 +13,8 @@ type UnstartedSession struct { UserDevice string UserDeviceType string UserCountry string + UserState string + UserCity string } func (conn *Conn) InsertUnstartedSession(s UnstartedSession) error { @@ -24,7 +26,7 @@ func (conn *Conn) InsertUnstartedSession(s UnstartedSession) error { user_os, user_os_version, user_browser, user_browser_version, user_device, user_device_type, - user_country + user_country, user_state, user_city ) VALUES ( (SELECT project_id FROM projects WHERE project_key = $1), $2, $3, @@ -32,7 +34,7 @@ func (conn *Conn) InsertUnstartedSession(s UnstartedSession) error { $6, $7, $8, $9, $10, $11, - $12 + $12, NULLIF($13, ''), NULLIF($14, '') )`, s.ProjectKey, s.TrackerVersion, s.DoNotTrack, @@ -40,6 +42,6 @@ func (conn *Conn) InsertUnstartedSession(s UnstartedSession) error { s.UserOS, s.UserOSVersion, s.UserBrowser, s.UserBrowserVersion, s.UserDevice, s.UserDeviceType, - s.UserCountry, + s.UserCountry, s.UserState, s.UserCity, ) } diff --git a/backend/pkg/db/types/session.go b/backend/pkg/db/types/session.go index 202eb9966..f4d4509fe 100644 --- a/backend/pkg/db/types/session.go +++ b/backend/pkg/db/types/session.go @@ -11,6 +11,8 @@ type Session struct { UserOSVersion string UserDevice string UserCountry string + UserState string + UserCity string Referrer *string Duration *uint64