TimberVis Flexiboard: Visualization and Exploration Flexiboard for Timber Buildings IoT data sensors. The pulse of the wood Monitoring of Wooden houses: Time series of sensors data measuring humidity, temperatures, vibrations and weather conditions. https://lnu.se/forskning/forskningsprojekt/projekt-flexiboard-for-visualisering-och-utforskning-av-trabyggnader/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
infravis-trahust/py/import.py

127 lines
3.8 KiB

import csv, sqlite3, os, glob
from itertools import compress
BASEDIR = '../Data/'
for hus_name in next(os.walk(BASEDIR))[1]:
print('Importing House: ' + hus_name)
db_file = hus_name + '.db'
try:
print(f"Deleting and recreating '{db_file}'...")
os.remove(db_file)
except:
#print('Some error occurred; Aborting DB import. Sorry! :(')
pass
con = sqlite3.connect(db_file)
cur = con.cursor()
cards = []
all_txts = sorted(glob.glob(f"{BASEDIR+hus_name}/**/*.TXT", recursive=True))
for txt in all_txts:
card, year, month = os.path.split(txt)[1].split('.')[0].split('-')
if card not in cards:
cards.append(card)
# Create a new table for the card
table_name = f'Hus_{hus_name}_card_{card}'
print(f'New card: {card}; table name: {table_name}')
hus_dir = BASEDIR + hus_name
with open(f'{hus_dir}/{card}-metadata.csv', encoding='utf-8') as card_file:
csv_reader = csv.reader(card_file)
next(csv_reader) # first line is not important
# Family names
family_names = next(csv_reader)
cur_family = family_names[3]
# Fill the gaps
for i in range(4,len(family_names)):
if not family_names[i]:
family_names[i] = cur_family
else:
cur_family = family_names[i]
#print(family_names)
# Sensor names
sensor_names = next(csv_reader)
#print(sensor_names)
# Units
units = next(csv_reader)
for i, u in enumerate(units):
units[i] = units[i].replace('°C', 'celsius')
units[i] = units[i].replace('%', 'percent')
units[i] = units[i].replace('Ω', 'ohm')
units[i] = units[i].replace('#', 'count')
units[i] = units[i].replace('/', 'X')
units[i] = units[i].replace('???', 'none')
#print(units)
# Remove all empty columns
not_empty = [x != 'EMPTY' for x in sensor_names]
family_names = list(compress(family_names, not_empty))
sensor_names = list(compress(sensor_names, not_empty))
units = list(compress(units, not_empty))
# Create the table
col_names = ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second']
col_names.extend(f'{family_names[i]}.{sensor_names[i]}.{units[i]}' for i in range(3, len(family_names)))
#print(col_names)
cur.execute(f"CREATE TABLE {table_name}{*col_names,}")
con.commit()
cur.execute(f'CREATE INDEX idx_{card}_Year ON {table_name} (Year)')
cur.execute(f'CREATE INDEX idx_{card}_Month ON {table_name} (Month)')
cur.execute(f'CREATE INDEX idx_{card}_Day ON {table_name} (Day)')
cur.execute(f'CREATE INDEX idx_{card}_Hour ON {table_name} (Hour)')
con.commit()
#exit()
print(f'Processing {txt}...')
with open(txt, 'r', encoding='latin-1') as fin: # Specify the encoding here
reader = csv.reader(fin)
data = []
for line in reader:
#print(line)
if line[0][:9] == 'Connected':
# Skip two lines; we don't need them anymore
header1 = next(reader)
header2 = next(reader)
else:
# First split the date into three columns (y,m,d)
row = line[0].split('-')
# Then split the time into three columns (h,m,s)
row.extend(line[1].split(':'))
# Then add the data (without the date, time, and empty columns)
row.extend(tuple(compress(line, not_empty))[2:])
data.append(row)
# Sometimes, some of the lines in a file are the wrong size.
# When that happens, we skip the file.
if len(data[-1]) != len(col_names):
break
# Sometimes, some of the lines in a file are the wrong size.
# When that happens, we skip the file.
if len(data[-1]) != len(col_names):
print(" - Invalid file format; SKIPPING!")
print(f" - Expected cols: {len(col_names)}; Found cols: {len(data[-1])}")
continue
placeholder = f"({','.join('?' for _ in range(len(data[0])))})"
query = f"INSERT INTO {table_name} VALUES {placeholder};"
#print(query)
#exit()
cur.executemany(query, data)
con.commit()