Changed types of some messages and force type when pandas fails to recognize column type
This commit is contained in:
parent
02cbeca771
commit
de1977b0de
2 changed files with 19 additions and 10 deletions
|
|
@ -351,18 +351,18 @@ def create_events_detailed_table(creds_file, table_id):
|
|||
bigquery.SchemaField("setnodeurlbasedattribute_value", "STRING"),
|
||||
bigquery.SchemaField("setnodeurlbasedattribute_baseurl", "STRING"),
|
||||
bigquery.SchemaField("setstyledata_id", "FLOAT64"),
|
||||
bigquery.SchemaField("setstyledata_data", "STRING"), #CAUSING ISSUES
|
||||
bigquery.SchemaField("setstyledata_baseurl", "STRING"), #CAUSING ISSUES
|
||||
bigquery.SchemaField("setstyledata_data", "STRING"),
|
||||
bigquery.SchemaField("setstyledata_baseurl", "STRING"),
|
||||
bigquery.SchemaField("issueevent_messageid", "FLOAT64"),
|
||||
bigquery.SchemaField("issueevent_timestamp", "FLOAT64"),
|
||||
bigquery.SchemaField("issueevent_type", "FLOAT64"),
|
||||
bigquery.SchemaField("issueevent_contextstring", "FLOAT64"),
|
||||
bigquery.SchemaField("issueevent_context", "FLOAT64"),
|
||||
bigquery.SchemaField("issueevent_payload", "FLOAT64"),
|
||||
bigquery.SchemaField("technicalinfo_type", "FLOAT64"),
|
||||
bigquery.SchemaField("technicalinfo_value", "FLOAT64"),
|
||||
bigquery.SchemaField("customissue_name", "FLOAT64"),
|
||||
bigquery.SchemaField("customissue_payload", "FLOAT64"),
|
||||
bigquery.SchemaField("issueevent_type", "STRING"),
|
||||
bigquery.SchemaField("issueevent_contextstring", "STRING"),
|
||||
bigquery.SchemaField("issueevent_context", "STRING"),
|
||||
bigquery.SchemaField("issueevent_payload", "STRING"),
|
||||
bigquery.SchemaField("technicalinfo_type", "STRING"),
|
||||
bigquery.SchemaField("technicalinfo_value", "STRING"),
|
||||
bigquery.SchemaField("customissue_name", "STRING"),
|
||||
bigquery.SchemaField("customissue_payload", "STRING"),
|
||||
bigquery.SchemaField("pageclose", "FLOAT64"),
|
||||
bigquery.SchemaField("received_at", "INT64", mode="REQUIRED"),
|
||||
bigquery.SchemaField("batch_order_number", "INT64", mode="REQUIRED")
|
||||
|
|
|
|||
|
|
@ -355,8 +355,17 @@ def get_df_from_batch(batch, level):
|
|||
df['issues'] = df['issues'].fillna('')
|
||||
df['urls'] = df['urls'].fillna('')
|
||||
|
||||
forced_type_cols_str = ['setnodeurlbasedattribute_name', 'setnodeurlbasedattribute_value',
|
||||
'setnodeurlbasedattribute_baseurl', 'setstyledata_data', 'setstyledata_baseurl',
|
||||
'customissue_payload', 'customissue_name', 'technicalinfo_value', 'technicalinfo_type',
|
||||
'issueevent_payload', 'issueevent_context', 'issueevent_contextstring', 'issueevent_type']
|
||||
forced_type_cols_float = ['setnodeurlbasedattribute_id']
|
||||
for x in df.columns:
|
||||
try:
|
||||
if x in forced_type_cols_str:
|
||||
df[x] = df[x].apply(str)
|
||||
if x in forced_type_cols_float:
|
||||
df[x] = df[x].apply(float)
|
||||
if df[x].dtype == 'string':
|
||||
df[x] = df[x].str.slice(0, 255)
|
||||
df[x] = df[x].str.replace("|", "")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue