Changed types of some messages and force type when pandas fails to recognize column type

This commit is contained in:
mauricio garcia suarez 2022-07-18 14:38:14 +02:00
parent 02cbeca771
commit de1977b0de
2 changed files with 19 additions and 10 deletions

View file

@ -351,18 +351,18 @@ def create_events_detailed_table(creds_file, table_id):
bigquery.SchemaField("setnodeurlbasedattribute_value", "STRING"),
bigquery.SchemaField("setnodeurlbasedattribute_baseurl", "STRING"),
bigquery.SchemaField("setstyledata_id", "FLOAT64"),
bigquery.SchemaField("setstyledata_data", "STRING"), #CAUSING ISSUES
bigquery.SchemaField("setstyledata_baseurl", "STRING"), #CAUSING ISSUES
bigquery.SchemaField("setstyledata_data", "STRING"),
bigquery.SchemaField("setstyledata_baseurl", "STRING"),
bigquery.SchemaField("issueevent_messageid", "FLOAT64"),
bigquery.SchemaField("issueevent_timestamp", "FLOAT64"),
bigquery.SchemaField("issueevent_type", "FLOAT64"),
bigquery.SchemaField("issueevent_contextstring", "FLOAT64"),
bigquery.SchemaField("issueevent_context", "FLOAT64"),
bigquery.SchemaField("issueevent_payload", "FLOAT64"),
bigquery.SchemaField("technicalinfo_type", "FLOAT64"),
bigquery.SchemaField("technicalinfo_value", "FLOAT64"),
bigquery.SchemaField("customissue_name", "FLOAT64"),
bigquery.SchemaField("customissue_payload", "FLOAT64"),
bigquery.SchemaField("issueevent_type", "STRING"),
bigquery.SchemaField("issueevent_contextstring", "STRING"),
bigquery.SchemaField("issueevent_context", "STRING"),
bigquery.SchemaField("issueevent_payload", "STRING"),
bigquery.SchemaField("technicalinfo_type", "STRING"),
bigquery.SchemaField("technicalinfo_value", "STRING"),
bigquery.SchemaField("customissue_name", "STRING"),
bigquery.SchemaField("customissue_payload", "STRING"),
bigquery.SchemaField("pageclose", "FLOAT64"),
bigquery.SchemaField("received_at", "INT64", mode="REQUIRED"),
bigquery.SchemaField("batch_order_number", "INT64", mode="REQUIRED")

View file

@ -355,8 +355,17 @@ def get_df_from_batch(batch, level):
df['issues'] = df['issues'].fillna('')
df['urls'] = df['urls'].fillna('')
forced_type_cols_str = ['setnodeurlbasedattribute_name', 'setnodeurlbasedattribute_value',
'setnodeurlbasedattribute_baseurl', 'setstyledata_data', 'setstyledata_baseurl',
'customissue_payload', 'customissue_name', 'technicalinfo_value', 'technicalinfo_type',
'issueevent_payload', 'issueevent_context', 'issueevent_contextstring', 'issueevent_type']
forced_type_cols_float = ['setnodeurlbasedattribute_id']
for x in df.columns:
try:
if x in forced_type_cols_str:
df[x] = df[x].apply(str)
if x in forced_type_cols_float:
df[x] = df[x].apply(float)
if df[x].dtype == 'string':
df[x] = df[x].str.slice(0, 255)
df[x] = df[x].str.replace("|", "")