import json
import numpy as np
import os
import re
import sys
[docs]def batch_data(data, batch_size):
'''
data is a dict := {'x': [list], 'y': [list]}
returns x, y, which are both lists of size-batch_size lists
'''
raw_x = data['x']
raw_y = data['y']
batched_x = []
batched_y = []
for i in range(0, len(raw_x), batch_size):
batched_x.append(raw_x[i:i+batch_size])
batched_y.append(raw_y[i:i+batch_size])
return batched_x, batched_y
[docs]def read_data(train_data_dir, test_data_dir):
'''parses data in given train and test data directories
assumes:
- the data in the input directories are .json files with
keys 'users' and 'user_data'
- the set of train set users is the same as the set of test set users
Return:
clients: list of client ids
groups: list of group ids; empty list if none found
train_data: dictionary of train data
test_data: dictionary of test data
'''
clients = []
groups = []
train_data = {}
test_data = {}
train_files = os.listdir(train_data_dir)
train_files = [f for f in train_files if f.endswith('.json')]
for f in train_files:
file_path = os.path.join(train_data_dir,f)
with open(file_path, 'r') as inf:
cdata = json.load(inf)
clients.extend(cdata['users'])
if 'hierarchies' in cdata:
groups.extend(cdata['hierarchies'])
train_data.update(cdata['user_data'])
test_files = os.listdir(test_data_dir)
test_files = [f for f in test_files if f.endswith('.json')]
for f in test_files:
file_path = os.path.join(test_data_dir, f)
with open(file_path, 'r') as inf:
cdata = json.load(inf)
test_data.update(cdata['user_data'])
clients = list(train_data.keys())
return clients, groups, train_data, test_data