[HTTP] added city and state support for geo extractor (#1312)

* feat(backend/http): added city and state support for geo extractor

* feat(backend): use new GeoLite db in Dockerfile

* feat(backend/http): added unit test
This commit is contained in:
Alexander 2023-06-08 09:07:08 +02:00 committed by GitHub
parent d8d4e0beeb
commit 691c84d5f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 170 additions and 55 deletions

View file

@ -96,7 +96,7 @@ ENV TZ=UTC \
RUN if [ "$SERVICE_NAME" = "http" ]; then \
wget https://raw.githubusercontent.com/ua-parser/uap-core/master/regexes.yaml -O "$UAPARSER_FILE" &&\
wget https://static.openreplay.com/geoip/GeoLite2-Country.mmdb -O "$MAXMINDDB_FILE"; fi
wget https://static.openreplay.com/geoip/GeoLite2-City.mmdb -O "$MAXMINDDB_FILE"; fi
COPY --from=build /root/service /home/openreplay/service

View file

@ -1,41 +1,65 @@
package geoip
import (
"github.com/oschwald/maxminddb-golang"
"log"
"net"
maxminddb "github.com/oschwald/maxminddb-golang"
)
type geoIPRecord struct {
Country struct {
ISOCode string `maxminddb:"iso_code"`
} `maxminddb:"country"`
States []struct {
Names map[string]string `maxminddb:"names"`
} `maxminddb:"subdivisions"`
City struct {
Names map[string]string `maxminddb:"names"`
} `maxminddb:"city"`
}
type GeoIP struct {
type GeoRecord struct {
Country string
State string
City string
}
type GeoParser interface {
Parse(ip net.IP) *GeoRecord
}
type geoParser struct {
r *maxminddb.Reader
}
func NewGeoIP(file string) *GeoIP {
func New(file string) GeoParser {
r, err := maxminddb.Open(file)
if err != nil {
log.Fatalln(err)
}
return &GeoIP{r}
return &geoParser{r}
}
func (geoIP *GeoIP) ExtractISOCode(ip net.IP) string {
func (geoIP *geoParser) Parse(ip net.IP) *GeoRecord {
res := &GeoRecord{
Country: "UN",
State: "",
City: "",
}
if ip == nil {
return "UN"
return res
}
var code string
var record geoIPRecord
if geoIP.r.Lookup(ip, &record) == nil {
code = record.Country.ISOCode
if err := geoIP.r.Lookup(ip, &record); err != nil {
log.Println(err)
return res
}
if code == "" {
code = "UN"
if record.Country.ISOCode != "" {
res.Country = record.Country.ISOCode
}
return code
if len(record.States) > 0 {
res.State = record.States[0].Names["en"]
}
res.City = record.City.Names["en"]
return res
}

View file

@ -0,0 +1,88 @@
package geoip
import (
"io"
"log"
"net"
"net/http"
"os"
"testing"
)
func LoadGeoLiteDB() {
fileURL := "https://static.openreplay.com/geoip/GeoLite2-City.mmdb"
// Create the file
file, err := os.Create("geo.mmdb")
if err != nil {
log.Fatal(err)
}
defer file.Close()
// Download the file
response, err := http.Get(fileURL)
if err != nil {
log.Fatal(err)
}
defer response.Body.Close()
// Check if the request was successful
if response.StatusCode != http.StatusOK {
log.Fatalf("Failed to download file: %s", response.Status)
}
// Copy the downloaded file to the local file
_, err = io.Copy(file, response.Body)
if err != nil {
log.Fatal(err)
}
}
func DeleteGeoLiteDB() {
if err := os.Remove("geo.mmdb"); err != nil {
log.Fatal(err)
}
}
func TestGeoIP(t *testing.T) {
LoadGeoLiteDB()
defer DeleteGeoLiteDB()
geoIP := New("geo.mmdb")
ip := net.ParseIP("92.151.113.120")
correctResult := &GeoRecord{
Country: "FR",
State: "Île-de-France",
City: "Courbevoie",
}
result := geoIP.Parse(ip)
if result.Country != correctResult.Country {
t.Errorf("Country is incorrect: %s != %s", result.Country, correctResult.Country)
}
if result.State != correctResult.State {
t.Errorf("State is incorrect: %s != %s", result.State, correctResult.State)
}
if result.City != correctResult.City {
t.Errorf("City is incorrect: %s != %s", result.City, correctResult.City)
}
emptyIP := net.ParseIP("")
correctResult = &GeoRecord{
Country: "UN",
State: "",
City: "",
}
result = geoIP.Parse(emptyIP)
if result.Country != correctResult.Country {
t.Errorf("Country is incorrect: %s != %s", result.Country, correctResult.Country)
}
if result.State != correctResult.State {
t.Errorf("State is incorrect: %s != %s", result.State, correctResult.State)
}
if result.City != correctResult.City {
t.Errorf("City is incorrect: %s != %s", result.City, correctResult.City)
}
}

View file

@ -1,13 +0,0 @@
package geoip
import (
"net"
"net/http"
"github.com/tomasen/realip"
)
func (geoIP *GeoIP) ExtractISOCodeFromHTTPRequest(r *http.Request) string {
ip := net.ParseIP(realip.FromRequest(r))
return geoIP.ExtractISOCode(ip)
}

View file

@ -71,8 +71,7 @@ func (e *Router) startSessionHandlerIOS(w http.ResponseWriter, r *http.Request)
// TODO: if EXPIRED => send message for two sessions association
expTime := startTime.Add(time.Duration(p.MaxSessionDuration) * time.Millisecond)
tokenData = &token.TokenData{sessionID, 0, expTime.UnixMilli()}
country := e.services.GeoIP.ExtractISOCodeFromHTTPRequest(r)
geoInfo := e.ExtractGeoData(r)
// The difference with web is mostly here:
sessStart := &IOSSessionStart{
@ -85,7 +84,7 @@ func (e *Router) startSessionHandlerIOS(w http.ResponseWriter, r *http.Request)
UserOSVersion: req.UserOSVersion,
UserDevice: ios.MapIOSDevice(req.UserDevice),
UserDeviceType: ios.GetIOSDeviceType(req.UserDevice),
UserCountry: country,
UserCountry: geoInfo.Country,
}
e.services.Producer.Produce(e.cfg.TopicRawIOS, tokenData.ID, sessStart.Encode())
}

View file

@ -140,6 +140,7 @@ func (e *Router) startSessionHandlerWeb(w http.ResponseWriter, r *http.Request)
Delay: startTimeMili - req.Timestamp,
ExpTime: expTime.UnixMilli(),
}
geoInfo := e.ExtractGeoData(r)
sessionStart := &SessionStart{
Timestamp: getSessionTimestamp(req, startTimeMili),
@ -154,14 +155,14 @@ func (e *Router) startSessionHandlerWeb(w http.ResponseWriter, r *http.Request)
UserBrowserVersion: ua.BrowserVersion,
UserDevice: ua.Device,
UserDeviceType: ua.DeviceType,
UserCountry: e.services.GeoIP.ExtractISOCodeFromHTTPRequest(r),
UserCountry: geoInfo.Country,
UserDeviceMemorySize: req.DeviceMemory,
UserDeviceHeapSize: req.JsHeapSizeLimit,
UserID: req.UserID,
}
// Save sessionStart to db
if err := e.services.Database.InsertWebSessionStart(sessionID, sessionStart); err != nil {
if err := e.services.Database.InsertWebSessionStart(sessionID, sessionStart, geoInfo); err != nil {
log.Printf("can't insert session start: %s", err)
}
@ -257,7 +258,7 @@ func (e *Router) notStartedHandlerWeb(w http.ResponseWriter, r *http.Request) {
ResponseWithError(w, http.StatusForbidden, errors.New("browser not recognized"), startTime, r.URL.Path, bodySize)
return
}
country := e.services.GeoIP.ExtractISOCodeFromHTTPRequest(r)
geoInfo := e.ExtractGeoData(r)
err = e.services.Database.InsertUnstartedSession(postgres.UnstartedSession{
ProjectKey: *req.ProjectKey,
TrackerVersion: req.TrackerVersion,
@ -270,7 +271,9 @@ func (e *Router) notStartedHandlerWeb(w http.ResponseWriter, r *http.Request) {
UserBrowserVersion: ua.BrowserVersion,
UserDevice: ua.Device,
UserDeviceType: ua.DeviceType,
UserCountry: country,
UserCountry: geoInfo.Country,
UserState: geoInfo.State,
UserCity: geoInfo.City,
})
if err != nil {
log.Printf("Unable to insert Unstarted Session: %v\n", err)

View file

@ -2,8 +2,11 @@ package router
import (
"fmt"
"github.com/tomasen/realip"
"log"
"net"
"net/http"
"openreplay/backend/internal/http/geoip"
"sync"
"time"
@ -86,6 +89,11 @@ func (e *Router) clearBeaconSizes() {
}
}
func (e *Router) ExtractGeoData(r *http.Request) *geoip.GeoRecord {
ip := net.ParseIP(realip.FromRequest(r))
return e.services.GeoIP.Parse(ip)
}
func (e *Router) init() {
e.router = mux.NewRouter()

View file

@ -16,7 +16,7 @@ type ServicesBuilder struct {
Producer types.Producer
Flaker *flakeid.Flaker
UaParser *uaparser.UAParser
GeoIP *geoip.GeoIP
GeoIP geoip.GeoParser
Tokenizer *token.Tokenizer
Storage *storage.S3
}
@ -28,7 +28,7 @@ func New(cfg *http.Config, producer types.Producer, pgconn *cache.PGCache) *Serv
Storage: storage.NewS3(cfg.AWSRegion, cfg.S3BucketIOSImages, cfg.UseFileTags()),
Tokenizer: token.NewTokenizer(cfg.TokenSecret),
UaParser: uaparser.NewUAParser(cfg.UAParserFile),
GeoIP: geoip.NewGeoIP(cfg.MaxMinDBFile),
GeoIP: geoip.New(cfg.MaxMinDBFile),
Flaker: flakeid.NewFlaker(cfg.WorkerID),
}
}

View file

@ -2,24 +2,26 @@ package cache
import (
"fmt"
"openreplay/backend/internal/http/geoip"
. "openreplay/backend/pkg/db/types"
. "openreplay/backend/pkg/messages"
)
func (c *PGCache) InsertWebSessionStart(sessionID uint64, s *SessionStart) error {
func (c *PGCache) InsertWebSessionStart(sessionID uint64, s *SessionStart, geo *geoip.GeoRecord) error {
return c.Conn.InsertSessionStart(sessionID, &Session{
SessionID: sessionID,
Platform: "web",
Timestamp: s.Timestamp,
ProjectID: uint32(s.ProjectID),
TrackerVersion: s.TrackerVersion,
RevID: s.RevID,
UserUUID: s.UserUUID,
UserOS: s.UserOS,
UserOSVersion: s.UserOSVersion,
UserDevice: s.UserDevice,
UserCountry: s.UserCountry,
// web properties (TODO: unite different platform types)
SessionID: sessionID,
Platform: "web",
Timestamp: s.Timestamp,
ProjectID: uint32(s.ProjectID),
TrackerVersion: s.TrackerVersion,
RevID: s.RevID,
UserUUID: s.UserUUID,
UserOS: s.UserOS,
UserOSVersion: s.UserOSVersion,
UserDevice: s.UserDevice,
UserCountry: geo.Country,
UserState: geo.State,
UserCity: geo.City,
UserAgent: s.UserAgent,
UserBrowser: s.UserBrowser,
UserBrowserVersion: s.UserBrowserVersion,

View file

@ -29,7 +29,7 @@ func (conn *Conn) InsertSessionStart(sessionID uint64, s *types.Session) error {
tracker_version, issue_score,
platform,
user_agent, user_browser, user_browser_version, user_device_memory_size, user_device_heap_size,
user_id
user_id, user_state, user_city
) VALUES (
$1, $2, $3,
$4, $5, $6, $7,
@ -38,7 +38,7 @@ func (conn *Conn) InsertSessionStart(sessionID uint64, s *types.Session) error {
$11, $12,
$13,
NULLIF($14, ''), NULLIF($15, ''), NULLIF($16, ''), NULLIF($17, 0), NULLIF($18, 0::bigint),
NULLIF(LEFT($19, 8000), '')
NULLIF(LEFT($19, 8000), ''), NULLIF($20, ''), NULLIF($21, '')
)`,
sessionID, s.ProjectID, s.Timestamp,
s.UserUUID, s.UserDevice, s.UserDeviceType, s.UserCountry,
@ -47,7 +47,7 @@ func (conn *Conn) InsertSessionStart(sessionID uint64, s *types.Session) error {
s.TrackerVersion, s.Timestamp/1000,
s.Platform,
s.UserAgent, s.UserBrowser, s.UserBrowserVersion, s.UserDeviceMemorySize, s.UserDeviceHeapSize,
s.UserID,
s.UserID, s.UserState, s.UserCity,
)
}

View file

@ -13,6 +13,8 @@ type UnstartedSession struct {
UserDevice string
UserDeviceType string
UserCountry string
UserState string
UserCity string
}
func (conn *Conn) InsertUnstartedSession(s UnstartedSession) error {
@ -24,7 +26,7 @@ func (conn *Conn) InsertUnstartedSession(s UnstartedSession) error {
user_os, user_os_version,
user_browser, user_browser_version,
user_device, user_device_type,
user_country
user_country, user_state, user_city
) VALUES (
(SELECT project_id FROM projects WHERE project_key = $1),
$2, $3,
@ -32,7 +34,7 @@ func (conn *Conn) InsertUnstartedSession(s UnstartedSession) error {
$6, $7,
$8, $9,
$10, $11,
$12
$12, NULLIF($13, ''), NULLIF($14, '')
)`,
s.ProjectKey,
s.TrackerVersion, s.DoNotTrack,
@ -40,6 +42,6 @@ func (conn *Conn) InsertUnstartedSession(s UnstartedSession) error {
s.UserOS, s.UserOSVersion,
s.UserBrowser, s.UserBrowserVersion,
s.UserDevice, s.UserDeviceType,
s.UserCountry,
s.UserCountry, s.UserState, s.UserCity,
)
}

View file

@ -11,6 +11,8 @@ type Session struct {
UserOSVersion string
UserDevice string
UserCountry string
UserState string
UserCity string
Referrer *string
Duration *uint64