souffle  2.0.2-371-g6315b36
ReadStreamJSON.h
Go to the documentation of this file.
1 /*
2  * Souffle - A Datalog Compiler
3  * Copyright (c) 2020, The Souffle Developers. All rights reserved
4  * Licensed under the Universal Permissive License v 1.0 as shown at:
5  * - https://opensource.org/licenses/UPL
6  * - <souffle root>/licenses/SOUFFLE-UPL.txt
7  */
8 
9 /************************************************************************
10  *
11  * @file ReadStreamJSON.h
12  *
13  ***********************************************************************/
14 
15 #pragma once
16 
17 #include "souffle/RamTypes.h"
18 #include "souffle/SymbolTable.h"
19 #include "souffle/io/ReadStream.h"
23 
24 #include <algorithm>
25 #include <cassert>
26 #include <cstddef>
27 #include <cstdint>
28 #include <fstream>
29 #include <iostream>
30 #include <map>
31 #include <memory>
32 #include <queue>
33 #include <sstream>
34 #include <stdexcept>
35 #include <string>
36 #include <tuple>
37 #include <vector>
38 
39 namespace souffle {
40 class RecordTable;
41 
42 class ReadStreamJSON : public ReadStream {
43 public:
44  ReadStreamJSON(std::istream& file, const std::map<std::string, std::string>& rwOperation,
45  SymbolTable& symbolTable, RecordTable& recordTable)
46  : ReadStream(rwOperation, symbolTable, recordTable), file(file), pos(0), isInitialized(false) {
47  std::string err;
48  params = Json::parse(rwOperation.at("params"), err);
49  if (err.length() > 0) {
50  fatal("cannot get internal params: %s", err);
51  }
52  }
53 
54 protected:
55  std::istream& file;
56  size_t pos;
57  Json jsonSource;
58  Json params;
60  bool useObjects;
61  std::map<const std::string, const size_t> paramIndex;
62 
64  // for some reasons we cannot initalized our json objects in constructor
65  // otherwise it will segfault, so we initialize in the first call
66  if (!isInitialized) {
67  isInitialized = true;
68  std::string error = "";
69  std::string source(std::istreambuf_iterator<char>(file), {});
70 
71  jsonSource = Json::parse(source, error);
72  // it should be wrapped by an extra array
73  if (error.length() > 0 || !jsonSource.is_array()) {
74  fatal("cannot deserialize json because %s:\n%s", error, source);
75  }
76 
77  // we only check the first one, since there are extra checks
78  // in readNextTupleObject/readNextTupleList
79  if (jsonSource[0].is_array()) {
80  useObjects = false;
81  } else if (jsonSource[0].is_object()) {
82  useObjects = true;
83  size_t index_pos = 0;
84  for (auto param : params["relation"]["params"].array_items()) {
85  paramIndex.insert(std::make_pair(param.string_value(), index_pos));
86  index_pos++;
87  }
88  } else {
89  fatal("the input is neither list nor object format");
90  }
91  }
92 
93  if (useObjects) {
94  return readNextTupleObject();
95  } else {
96  return readNextTupleList();
97  }
98  }
99 
100  Own<RamDomain[]> readNextTupleList() {
101  if (pos >= jsonSource.array_items().size()) {
102  return nullptr;
103  }
104 
105  Own<RamDomain[]> tuple = std::make_unique<RamDomain[]>(typeAttributes.size());
106  const Json& jsonObj = jsonSource[pos];
107  assert(jsonObj.is_array() && "the input is not json array");
108  pos++;
109  for (size_t i = 0; i < typeAttributes.size(); ++i) {
110  try {
111  auto&& ty = typeAttributes.at(i);
112  switch (ty[0]) {
113  case 's': {
114  tuple[i] = symbolTable.unsafeLookup(jsonObj[i].string_value());
115  break;
116  }
117  case 'r': {
118  tuple[i] = readNextElementList(jsonObj[i], ty);
119  break;
120  }
121  case 'i': {
122  tuple[i] = jsonObj[i].int_value();
123  break;
124  }
125  case 'u': {
126  tuple[i] = jsonObj[i].int_value();
127  break;
128  }
129  case 'f': {
130  tuple[i] = static_cast<RamDomain>(jsonObj[i].number_value());
131  break;
132  }
133  default: fatal("invalid type attribute: `%c`", ty[0]);
134  }
135  } catch (...) {
136  std::stringstream errorMessage;
137  if (jsonObj.is_array() && i < jsonObj.array_items().size()) {
138  errorMessage << "Error converting: " << jsonObj[i].dump();
139  } else {
140  errorMessage << "Invalid index: " << i;
141  }
142  throw std::invalid_argument(errorMessage.str());
143  }
144  }
145 
146  return tuple;
147  }
148 
149  RamDomain readNextElementList(const Json& source, const std::string& recordTypeName) {
150  auto&& recordInfo = types["records"][recordTypeName];
151 
152  if (recordInfo.is_null()) {
153  throw std::invalid_argument("Missing record type information: " + recordTypeName);
154  }
155 
156  // Handle null case
157  if (source.is_null()) {
158  return 0;
159  }
160 
161  assert(source.is_array() && "the input is not json array");
162  auto&& recordTypes = recordInfo["types"];
163  const size_t recordArity = recordInfo["arity"].long_value();
164  std::vector<RamDomain> recordValues(recordArity);
165  for (size_t i = 0; i < recordArity; ++i) {
166  const std::string& recordType = recordTypes[i].string_value();
167  switch (recordType[0]) {
168  case 's': {
169  recordValues[i] = symbolTable.unsafeLookup(source[i].string_value());
170  break;
171  }
172  case 'r': {
173  recordValues[i] = readNextElementList(source[i], recordType);
174  break;
175  }
176  case 'i': {
177  recordValues[i] = source[i].int_value();
178  break;
179  }
180  case 'u': {
181  recordValues[i] = source[i].int_value();
182  break;
183  }
184  case 'f': {
185  recordValues[i] = static_cast<RamDomain>(source[i].number_value());
186  break;
187  }
188  default: fatal("invalid type attribute");
189  }
190  }
191 
192  return recordTable.pack(recordValues.data(), recordValues.size());
193  }
194 
195  Own<RamDomain[]> readNextTupleObject() {
196  if (pos >= jsonSource.array_items().size()) {
197  return nullptr;
198  }
199 
200  Own<RamDomain[]> tuple = std::make_unique<RamDomain[]>(typeAttributes.size());
201  const Json& jsonObj = jsonSource[pos];
202  assert(jsonObj.is_object() && "the input is not json object");
203  pos++;
204  for (auto p : jsonObj.object_items()) {
205  try {
206  // get the corresponding position by parameter name
207  if (paramIndex.find(p.first) == paramIndex.end()) {
208  fatal("invalid parameter: %s", p.first);
209  }
210  size_t i = paramIndex.at(p.first);
211  auto&& ty = typeAttributes.at(i);
212  switch (ty[0]) {
213  case 's': {
214  tuple[i] = symbolTable.unsafeLookup(p.second.string_value());
215  break;
216  }
217  case 'r': {
218  tuple[i] = readNextElementObject(p.second, ty);
219  break;
220  }
221  case 'i': {
222  tuple[i] = p.second.int_value();
223  break;
224  }
225  case 'u': {
226  tuple[i] = p.second.int_value();
227  break;
228  }
229  case 'f': {
230  tuple[i] = static_cast<RamDomain>(p.second.number_value());
231  break;
232  }
233  default: fatal("invalid type attribute: `%c`", ty[0]);
234  }
235  } catch (...) {
236  std::stringstream errorMessage;
237  errorMessage << "Error converting: " << p.second.dump();
238  throw std::invalid_argument(errorMessage.str());
239  }
240  }
241 
242  return tuple;
243  }
244 
245  RamDomain readNextElementObject(const Json& source, const std::string& recordTypeName) {
246  auto&& recordInfo = types["records"][recordTypeName];
247  const std::string recordName = recordTypeName.substr(2);
248  std::map<const std::string, const size_t> recordIndex;
249 
250  size_t index_pos = 0;
251  for (auto param : params["records"][recordName]["params"].array_items()) {
252  recordIndex.insert(std::make_pair(param.string_value(), index_pos));
253  index_pos++;
254  }
255 
256  if (recordInfo.is_null()) {
257  throw std::invalid_argument("Missing record type information: " + recordTypeName);
258  }
259 
260  // Handle null case
261  if (source.is_null()) {
262  return 0;
263  }
264 
265  assert(source.is_object() && "the input is not json object");
266  auto&& recordTypes = recordInfo["types"];
267  const size_t recordArity = recordInfo["arity"].long_value();
268  std::vector<RamDomain> recordValues(recordArity);
269  recordValues.reserve(recordIndex.size());
270  for (auto readParam : source.object_items()) {
271  // get the corresponding position by parameter name
272  if (recordIndex.find(readParam.first) == recordIndex.end()) {
273  fatal("invalid parameter: %s", readParam.first);
274  }
275  size_t i = recordIndex.at(readParam.first);
276  auto&& type = recordTypes[i].string_value();
277  switch (type[0]) {
278  case 's': {
279  recordValues[i] = symbolTable.unsafeLookup(readParam.second.string_value());
280  break;
281  }
282  case 'r': {
283  recordValues[i] = readNextElementObject(readParam.second, type);
284  break;
285  }
286  case 'i': {
287  recordValues[i] = readParam.second.int_value();
288  break;
289  }
290  case 'u': {
291  recordValues[i] = readParam.second.int_value();
292  break;
293  }
294  case 'f': {
295  recordValues[i] = static_cast<RamDomain>(readParam.second.number_value());
296  break;
297  }
298  default: fatal("invalid type attribute: `%c`", type[0]);
299  }
300  }
301 
302  return recordTable.pack(recordValues.data(), recordValues.size());
303  }
304 };
305 
306 class ReadFileJSON : public ReadStreamJSON {
307 public:
308  ReadFileJSON(const std::map<std::string, std::string>& rwOperation, SymbolTable& symbolTable,
309  RecordTable& recordTable)
311  baseName(souffle::baseName(getFileName(rwOperation))),
312  fileHandle(getFileName(rwOperation), std::ios::in | std::ios::binary) {
313  if (!fileHandle.is_open()) {
314  throw std::invalid_argument("Cannot open json file " + baseName + "\n");
315  }
316  }
317 
318  ~ReadFileJSON() override = default;
319 
320 protected:
321  /**
322  * Return given filename or construct from relation name.
323  * Default name is [configured path]/[relation name].json
324  *
325  * @param rwOperation map of IO configuration options
326  * @return input filename
327  */
328  static std::string getFileName(const std::map<std::string, std::string>& rwOperation) {
329  auto name = getOr(rwOperation, "filename", rwOperation.at("name") + ".json");
330  if (name.front() != '/') {
331  name = getOr(rwOperation, "fact-dir", ".") + "/" + name;
332  }
333  return name;
334  }
335 
336  std::string baseName;
337  std::ifstream fileHandle;
338 };
339 
341 public:
342  Own<ReadStream> getReader(const std::map<std::string, std::string>& rwOperation, SymbolTable& symbolTable,
343  RecordTable& recordTable) override {
344  return mk<ReadStreamJSON>(std::cin, rwOperation, symbolTable, recordTable);
345  }
346 
347  const std::string& getName() const override {
348  static const std::string name = "json";
349  return name;
350  }
351  ~ReadCinJSONFactory() override = default;
352 };
353 
354 class ReadFileJSONFactory : public ReadStreamFactory {
355 public:
356  Own<ReadStream> getReader(const std::map<std::string, std::string>& rwOperation, SymbolTable& symbolTable,
357  RecordTable& recordTable) override {
358  return mk<ReadFileJSON>(rwOperation, symbolTable, recordTable);
359  }
360 
361  const std::string& getName() const override {
362  static const std::string name = "jsonfile";
363  return name;
364  }
365 
366  ~ReadFileJSONFactory() override = default;
367 };
368 } // namespace souffle
souffle::SerialisationStream< false >::recordTable
RO< RecordTable > & recordTable
Definition: SerialisationStream.h:72
souffle::ReadFileJSON::~ReadFileJSON
~ReadFileJSON() override=default
err
std::string & err
Definition: json11.h:664
souffle::ReadStreamJSON::readNextElementList
RamDomain readNextElementList(const Json &source, const std::string &recordTypeName)
Definition: ReadStreamJSON.h:153
souffle::ReadStreamJSON::file
std::istream & file
Definition: ReadStreamJSON.h:59
json11::Json::dump
void dump(std::string &out) const
Definition: json11.h:370
json11::Json::parse
static Json parse(const std::string &in, std::string &err, JsonParse strategy=JsonParse::STANDARD)
Definition: json11.h:1071
souffle::ReadStreamJSON::readNextTupleList
Own< RamDomain[]> readNextTupleList()
Definition: ReadStreamJSON.h:104
souffle::RamDomain
int32_t RamDomain
Definition: RamTypes.h:56
SymbolTable.h
souffle::ReadStreamJSON::pos
size_t pos
Definition: ReadStreamJSON.h:60
souffle::ReadCinJSONFactory::getReader
Own< ReadStream > getReader(const std::map< std::string, std::string > &rwOperation, SymbolTable &symbolTable, RecordTable &recordTable) override
Definition: ReadStreamJSON.h:346
souffle::RecordTable
Definition: RecordTable.h:114
souffle::ReadFileJSONFactory::getName
const std::string & getName() const override
Definition: ReadStreamJSON.h:365
souffle::Own
std::unique_ptr< A > Own
Definition: ContainerUtil.h:42
souffle::ReadFileJSONFactory::~ReadFileJSONFactory
~ReadFileJSONFactory() override=default
souffle::ReadStreamJSON::readNextTuple
Own< RamDomain[]> readNextTuple() override
Definition: ReadStreamJSON.h:67
souffle::ReadStreamJSON
Definition: ReadStreamJSON.h:46
souffle::ReadCinJSONFactory
Definition: ReadStreamJSON.h:344
souffle::ReadStreamJSON::jsonSource
Json jsonSource
Definition: ReadStreamJSON.h:61
souffle::ReadFileJSONFactory
Definition: ReadStreamJSON.h:358
json11::Json::object_items
const object & object_items() const
Definition: json11.h:557
souffle::SerialisationStream< false >::symbolTable
RO< SymbolTable > & symbolTable
Definition: SerialisationStream.h:71
souffle::ReadStreamFactory
Definition: ReadStream.h:311
souffle::SerialisationStream< false >::typeAttributes
std::vector< std::string > typeAttributes
Definition: SerialisationStream.h:74
souffle::getOr
C::mapped_type const & getOr(const C &container, typename C::key_type key, const typename C::mapped_type &defaultValue)
Get value for a given key; if not found, return default value.
Definition: ContainerUtil.h:111
souffle::SerialisationStream< false >::types
Json types
Definition: SerialisationStream.h:73
json11::Json::array_items
const array & array_items() const
Definition: json11.h:554
ReadStream.h
i
size_t i
Definition: json11.h:663
souffle::ReadFileJSON::getFileName
static std::string getFileName(const std::map< std::string, std::string > &rwOperation)
Return given filename or construct from relation name.
Definition: ReadStreamJSON.h:332
ContainerUtil.h
souffle::ReadFileJSON::ReadFileJSON
ReadFileJSON(const std::map< std::string, std::string > &rwOperation, SymbolTable &symbolTable, RecordTable &recordTable)
Definition: ReadStreamJSON.h:312
souffle::ReadStreamJSON::useObjects
bool useObjects
Definition: ReadStreamJSON.h:64
StringUtil.h
souffle::SymbolTable
Definition: SymbolTable.h:48
json11::Json::is_object
bool is_object() const
Definition: json11.h:150
souffle::ReadFileJSON::baseName
std::string baseName
Definition: ReadStreamJSON.h:340
json11::Json
Definition: json11.h:87
json11::Json::is_array
bool is_array() const
Definition: json11.h:147
souffle::ReadCinJSONFactory::~ReadCinJSONFactory
~ReadCinJSONFactory() override=default
souffle::ReadStreamJSON::paramIndex
std::map< const std::string, const size_t > paramIndex
Definition: ReadStreamJSON.h:65
souffle::ReadStreamJSON::ReadStreamJSON
ReadStreamJSON(std::istream &file, const std::map< std::string, std::string > &rwOperation, SymbolTable &symbolTable, RecordTable &recordTable)
Definition: ReadStreamJSON.h:48
std
Definition: Brie.h:3053
souffle::ReadStreamJSON::params
Json params
Definition: ReadStreamJSON.h:62
RamTypes.h
souffle::ReadStreamJSON::isInitialized
bool isInitialized
Definition: ReadStreamJSON.h:63
souffle::fatal
void fatal(const char *format, const Args &... args)
Definition: MiscUtil.h:198
souffle::ReadFileJSON::fileHandle
std::ifstream fileHandle
Definition: ReadStreamJSON.h:341
FileUtil.h
souffle
Definition: AggregateOp.h:25
souffle::ReadStreamJSON::readNextElementObject
RamDomain readNextElementObject(const Json &source, const std::string &recordTypeName)
Definition: ReadStreamJSON.h:249
json11::Json::number_value
double number_value() const
Definition: json11.h:539
souffle::ReadFileJSONFactory::getReader
Own< ReadStream > getReader(const std::map< std::string, std::string > &rwOperation, SymbolTable &symbolTable, RecordTable &recordTable) override
Definition: ReadStreamJSON.h:360
souffle::ReadCinJSONFactory::getName
const std::string & getName() const override
Definition: ReadStreamJSON.h:351
souffle::tuple
Defines a tuple for the OO interface such that relations with varying columns can be accessed.
Definition: SouffleInterface.h:443
souffle::ReadStreamJSON::readNextTupleObject
Own< RamDomain[]> readNextTupleObject()
Definition: ReadStreamJSON.h:199
json11::Json::int_value
int int_value() const
Definition: json11.h:542
std::type
ElementType type
Definition: span.h:640
p
a horizontalBars(j=m=void 0===a.axisX.type?new c.AutoScaleAxis(c.Axis.units.x, b.normalized.series, o, c.extend({}, a.axisX,{highLow:d, referenceValue:0})):a.axisX.type.call(c, c.Axis.units.x, b.normalized.series, o, c.extend({}, a.axisX,{highLow:d, referenceValue:0})), l=n=void 0===a.axisY.type?new c.StepAxis(c.Axis.units.y, b.normalized.series, o,{ticks:k}):a.axisY.type.call(c, c.Axis.units.y, b.normalized.series, o, a.axisY)) var p
Definition: htmlJsChartistMin.h:15
souffle::ReadStream
Definition: ReadStream.h:40