souffle  2.0.2-371-g6315b36
ReadStream.h
Go to the documentation of this file.
1 /*
2  * Souffle - A Datalog Compiler
3  * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved
4  * Licensed under the Universal Permissive License v 1.0 as shown at:
5  * - https://opensource.org/licenses/UPL
6  * - <souffle root>/licenses/SOUFFLE-UPL.txt
7  */
8 
9 /************************************************************************
10  *
11  * @file ReadStream.h
12  *
13  ***********************************************************************/
14 
15 #pragma once
16 
17 #include "souffle/RamTypes.h"
18 #include "souffle/RecordTable.h"
19 #include "souffle/SymbolTable.h"
24 #include "souffle/utility/json11.h"
25 #include <cctype>
26 #include <cstddef>
27 #include <map>
28 #include <memory>
29 #include <ostream>
30 #include <stdexcept>
31 #include <string>
32 #include <vector>
33 
34 namespace souffle {
35 
36 class ReadStream : public SerialisationStream<false> {
37 protected:
38  ReadStream(
39  const std::map<std::string, std::string>& rwOperation, SymbolTable& symTab, RecordTable& recTab)
40  : SerialisationStream(symTab, recTab, rwOperation) {}
41 
42 public:
43  template <typename T>
44  void readAll(T& relation) {
45  auto lease = symbolTable.acquireLock();
46  (void)lease;
47  while (const auto next = readNextTuple()) {
48  const RamDomain* ramDomain = next.get();
49  relation.insert(ramDomain);
50  }
51  }
52 
53 protected:
54  /**
55  * Read a record from a string.
56  *
57  * @param source - string containing a record
58  * @param recordTypeName - record type.
59  * @parem pos - start parsing from this position.
60  * @param consumed - if not nullptr: number of characters read.
61  *
62  */
63  RamDomain readRecord(const std::string& source, const std::string& recordTypeName, size_t pos = 0,
64  size_t* charactersRead = nullptr) {
65  const size_t initial_position = pos;
66 
67  // Check if record type information are present
68  auto&& recordInfo = types["records"][recordTypeName];
69  if (recordInfo.is_null()) {
70  throw std::invalid_argument("Missing record type information: " + recordTypeName);
71  }
72 
73  // Handle nil case
74  consumeWhiteSpace(source, pos);
75  if (source.substr(pos, 3) == "nil") {
76  if (charactersRead != nullptr) {
77  *charactersRead = 3;
78  }
79  return 0;
80  }
81 
82  auto&& recordTypes = recordInfo["types"];
83  const size_t recordArity = recordInfo["arity"].long_value();
84 
85  std::vector<RamDomain> recordValues(recordArity);
86 
87  consumeChar(source, '[', pos);
88 
89  for (size_t i = 0; i < recordArity; ++i) {
90  const std::string& recordType = recordTypes[i].string_value();
91  size_t consumed = 0;
92 
93  if (i > 0) {
94  consumeChar(source, ',', pos);
95  }
96  consumeWhiteSpace(source, pos);
97  switch (recordType[0]) {
98  case 's': {
99  recordValues[i] = symbolTable.unsafeLookup(readUntil(source, ",]", pos, &consumed));
100  break;
101  }
102  case 'i': {
103  recordValues[i] = RamSignedFromString(source.substr(pos), &consumed);
104  break;
105  }
106  case 'u': {
107  recordValues[i] = ramBitCast(RamUnsignedFromString(source.substr(pos), &consumed));
108  break;
109  }
110  case 'f': {
111  recordValues[i] = ramBitCast(RamFloatFromString(source.substr(pos), &consumed));
112  break;
113  }
114  case 'r': {
115  recordValues[i] = readRecord(source, recordType, pos, &consumed);
116  break;
117  }
118  case '+': {
119  recordValues[i] = readADT(source, recordType, pos, &consumed);
120  break;
121  }
122  default: fatal("Invalid type attribute");
123  }
124  pos += consumed;
125  }
126  consumeChar(source, ']', pos);
127 
128  if (charactersRead != nullptr) {
129  *charactersRead = pos - initial_position;
130  }
131 
132  return recordTable.pack(recordValues.data(), recordValues.size());
133  }
134 
135  RamDomain readADT(const std::string& source, const std::string& adtName, size_t pos = 0,
136  size_t* charactersRead = nullptr) {
137  const size_t initial_position = pos;
138 
139  // Branch will are encoded as one of the:
140  // [branchIdx, [branchValues...]]
141  // [branchIdx, branchValue]
142  // branchIdx
143  RamDomain branchIdx = -1;
144 
145  auto&& adtInfo = types["ADTs"][adtName];
146  const auto& branches = adtInfo["branches"];
147 
148  if (adtInfo.is_null() || !branches.is_array()) {
149  throw std::invalid_argument("Missing ADT information: " + adtName);
150  }
151 
152  // Consume initial character
153  consumeChar(source, '$', pos);
154  std::string constructor = readAlphanumeric(source, pos);
155 
156  json11::Json branchInfo = [&]() -> json11::Json {
157  for (auto branch : branches.array_items()) {
158  ++branchIdx;
159  if (branch["name"].string_value() == constructor) {
160  return branch;
161  }
162  }
163 
164  throw std::invalid_argument("Missing branch information: " + constructor);
165  }();
166 
167  assert(branchInfo["types"].is_array());
168  auto branchTypes = branchInfo["types"].array_items();
169 
170  // Handle a branch without arguments.
171  if (branchTypes.empty()) {
172  if (charactersRead != nullptr) {
173  *charactersRead = pos - initial_position;
174  }
175 
176  if (adtInfo["enum"].bool_value()) {
177  return branchIdx;
178  }
179 
180  RamDomain emptyArgs = recordTable.pack(toVector<RamDomain>().data(), 0);
181  return recordTable.pack(toVector<RamDomain>(branchIdx, emptyArgs).data(), 2);
182  }
183 
184  consumeChar(source, '(', pos);
185 
186  std::vector<RamDomain> branchArgs(branchTypes.size());
187 
188  for (size_t i = 0; i < branchTypes.size(); ++i) {
189  auto argType = branchTypes[i].string_value();
190  assert(!argType.empty());
191 
192  size_t consumed = 0;
193 
194  if (i > 0) {
195  consumeChar(source, ',', pos);
196  }
197  consumeWhiteSpace(source, pos);
198 
199  switch (argType[0]) {
200  case 's': {
201  branchArgs[i] = symbolTable.unsafeLookup(readUntil(source, ",)", pos, &consumed));
202  break;
203  }
204  case 'i': {
205  branchArgs[i] = RamSignedFromString(source.substr(pos), &consumed);
206  break;
207  }
208  case 'u': {
209  branchArgs[i] = ramBitCast(RamUnsignedFromString(source.substr(pos), &consumed));
210  break;
211  }
212  case 'f': {
213  branchArgs[i] = ramBitCast(RamFloatFromString(source.substr(pos), &consumed));
214  break;
215  }
216  case 'r': {
217  branchArgs[i] = readRecord(source, argType, pos, &consumed);
218  break;
219  }
220  case '+': {
221  branchArgs[i] = readADT(source, argType, pos, &consumed);
222  break;
223  }
224  default: fatal("Invalid type attribute");
225  }
226  pos += consumed;
227  }
228 
229  consumeChar(source, ')', pos);
230 
231  if (charactersRead != nullptr) {
232  *charactersRead = pos - initial_position;
233  }
234 
235  // Store branch either as [branch_id, [arguments]] or [branch_id, argument].
236  RamDomain branchValue = [&]() -> RamDomain {
237  if (branchArgs.size() != 1) {
238  return recordTable.pack(branchArgs.data(), branchArgs.size());
239  } else {
240  return branchArgs[0];
241  }
242  }();
243 
244  return recordTable.pack(toVector<RamDomain>(branchIdx, branchValue).data(), 2);
245  }
246 
247  /**
248  * Read the next alphanumeric sequence (corresponding to IDENT).
249  * Consume preceding whitespace.
250  * TODO (darth_tytus): use std::string_view?
251  */
252  std::string readAlphanumeric(const std::string& source, size_t& pos) {
253  consumeWhiteSpace(source, pos);
254  if (pos >= source.length()) {
255  throw std::invalid_argument("Unexpected end of input");
256  }
257 
258  const size_t bgn = pos;
259  while (pos < source.length() && std::isalnum(static_cast<unsigned char>(source[pos]))) {
260  ++pos;
261  }
262 
263  return source.substr(bgn, pos - bgn);
264  }
265 
266  std::string readUntil(const std::string& source, const std::string stopChars, const size_t pos,
267  size_t* charactersRead) {
268  size_t endOfSymbol = source.find_first_of(stopChars, pos);
269 
270  if (endOfSymbol == std::string::npos) {
271  throw std::invalid_argument("Unexpected end of input");
272  }
273 
274  *charactersRead = endOfSymbol - pos;
275 
276  return source.substr(pos, *charactersRead);
277  }
278 
279  /**
280  * Read past given character, consuming any preceding whitespace.
281  */
282  void consumeChar(const std::string& str, char c, size_t& pos) {
283  consumeWhiteSpace(str, pos);
284  if (pos >= str.length()) {
285  throw std::invalid_argument("Unexpected end of input");
286  }
287  if (str[pos] != c) {
288  std::stringstream error;
289  error << "Expected: \'" << c << "\', got: " << str[pos];
290  throw std::invalid_argument(error.str());
291  }
292  ++pos;
293  }
294 
295  /**
296  * Advance position in the string until first non-whitespace character.
297  */
298  void consumeWhiteSpace(const std::string& str, size_t& pos) {
299  while (pos < str.length() && std::isspace(static_cast<unsigned char>(str[pos]))) {
300  ++pos;
301  }
302  }
303 
304  virtual Own<RamDomain[]> readNextTuple() = 0;
305 };
306 
307 class ReadStreamFactory {
308 public:
309  virtual Own<ReadStream> getReader(
310  const std::map<std::string, std::string>&, SymbolTable&, RecordTable&) = 0;
311  virtual const std::string& getName() const = 0;
312  virtual ~ReadStreamFactory() = default;
313 };
314 
315 } /* namespace souffle */
souffle::RamSignedFromString
RamSigned RamSignedFromString(const std::string &str, std::size_t *position=nullptr, const int base=10)
Converts a string to a RamSigned.
Definition: StringUtil.h:51
souffle::SerialisationStream< false >::recordTable
RO< RecordTable > & recordTable
Definition: SerialisationStream.h:72
souffle::RamDomain
int32_t RamDomain
Definition: RamTypes.h:56
souffle::SerialisationStream
Definition: SerialisationStream.h:43
SymbolTable.h
souffle::RecordTable
Definition: RecordTable.h:114
SerialisationStream.h
souffle::Own
std::unique_ptr< A > Own
Definition: ContainerUtil.h:42
relation
Relation & relation
Definition: Reader.h:130
MiscUtil.h
json11.h
souffle::SerialisationStream< false >::symbolTable
RO< SymbolTable > & symbolTable
Definition: SerialisationStream.h:71
souffle::ReadStreamFactory
Definition: ReadStream.h:311
str
const std::string & str
Definition: json11.h:662
souffle::ReadStream::readNextTuple
virtual Own< RamDomain[]> readNextTuple()=0
souffle::ReadStreamFactory::~ReadStreamFactory
virtual ~ReadStreamFactory()=default
souffle::SerialisationStream< false >::types
Json types
Definition: SerialisationStream.h:73
json11::Json::array_items
const array & array_items() const
Definition: json11.h:554
i
size_t i
Definition: json11.h:663
ContainerUtil.h
souffle::ReadStream::consumeWhiteSpace
void consumeWhiteSpace(const std::string &str, size_t &pos)
Advance position in the string until first non-whitespace character.
Definition: ReadStream.h:306
souffle::ReadStream::readUntil
std::string readUntil(const std::string &source, const std::string stopChars, const size_t pos, size_t *charactersRead)
Definition: ReadStream.h:274
souffle::ReadStreamFactory::getReader
virtual Own< ReadStream > getReader(const std::map< std::string, std::string > &, SymbolTable &, RecordTable &)=0
StringUtil.h
souffle::SymbolTable
Definition: SymbolTable.h:48
json11::Json
Definition: json11.h:87
RecordTable.h
souffle::ReadStream::readAlphanumeric
std::string readAlphanumeric(const std::string &source, size_t &pos)
Read the next alphanumeric sequence (corresponding to IDENT).
Definition: ReadStream.h:260
RamTypes.h
souffle::ReadStream::readAll
void readAll(T &relation)
Definition: ReadStream.h:52
souffle::fatal
void fatal(const char *format, const Args &... args)
Definition: MiscUtil.h:198
souffle
Definition: AggregateOp.h:25
souffle::RamUnsignedFromString
RamUnsigned RamUnsignedFromString(const std::string &str, std::size_t *position=nullptr, const int base=10)
Converts a string to a RamUnsigned.
Definition: StringUtil.h:110
TCB_SPAN_NAMESPACE_NAME::detail::data
constexpr auto data(C &c) -> decltype(c.data())
Definition: span.h:210
souffle::ramBitCast
To ramBitCast(From source)
In C++20 there will be a new way to cast between types by reinterpreting bits (std::bit_cast),...
Definition: RamTypes.h:87
souffle::RamFloatFromString
RamFloat RamFloatFromString(const std::string &str, std::size_t *position=nullptr)
Converts a string to a RamFloat.
Definition: StringUtil.h:93
souffle::ReadStream::readADT
RamDomain readADT(const std::string &source, const std::string &adtName, size_t pos=0, size_t *charactersRead=nullptr)
Definition: ReadStream.h:143
souffle::ReadStream::readRecord
RamDomain readRecord(const std::string &source, const std::string &recordTypeName, size_t pos=0, size_t *charactersRead=nullptr)
Read a record from a string.
Definition: ReadStream.h:71
souffle::ReadStream::ReadStream
ReadStream(const std::map< std::string, std::string > &rwOperation, SymbolTable &symTab, RecordTable &recTab)
Definition: ReadStream.h:46
souffle::ReadStreamFactory::getName
virtual const std::string & getName() const =0
souffle::ReadStream::consumeChar
void consumeChar(const std::string &str, char c, size_t &pos)
Read past given character, consuming any preceding whitespace.
Definition: ReadStream.h:290