Skip to content

Commit b1f9dae

Browse files
authored
Implement a slow but working SchemaFrame::is_reachable (#2209)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent e020830 commit b1f9dae

13 files changed

+4700
-14
lines changed

benchmark/jsonschema.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,24 @@ static void Schema_Frame_Many_Resources_References(benchmark::State &state) {
179179
}
180180
}
181181

182+
static void Schema_Frame_KrakenD_Reachable(benchmark::State &state) {
183+
const auto schema{
184+
sourcemeta::core::read_json(std::filesystem::path{CURRENT_DIRECTORY} /
185+
"schemas" / "2019_09_krakend.json")};
186+
187+
sourcemeta::core::SchemaFrame frame{
188+
sourcemeta::core::SchemaFrame::Mode::References};
189+
frame.analyse(schema, sourcemeta::core::schema_walker,
190+
sourcemeta::core::schema_resolver);
191+
192+
for (auto _ : state) {
193+
for (const auto &entry : frame.locations()) {
194+
auto result{frame.is_reachable(entry.second)};
195+
benchmark::DoNotOptimize(result);
196+
}
197+
}
198+
}
199+
182200
BENCHMARK(Schema_Frame_WoT_References);
183201
BENCHMARK(Schema_Frame_OMC_References);
184202
BENCHMARK(Schema_Frame_OMC_Locations);
@@ -191,3 +209,4 @@ BENCHMARK(Schema_Tracker_ISO_Language_To_JSON);
191209
BENCHMARK(Schema_Format_ISO_Language_To_JSON);
192210
BENCHMARK(Schema_Bundle_Meta_2020_12);
193211
BENCHMARK(Schema_Frame_Many_Resources_References);
212+
BENCHMARK(Schema_Frame_KrakenD_Reachable);

src/core/jsonschema/frame.cc

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,4 +1359,171 @@ auto SchemaFrame::reset() -> void {
13591359
this->references_.clear();
13601360
}
13611361

1362+
static auto is_definitions_container(
1363+
const WeakPointer &pointer_location,
1364+
const std::map<WeakPointer, const SchemaFrame::Location *> &pointer_index)
1365+
-> bool {
1366+
for (const auto &[candidate_pointer, candidate_location] : pointer_index) {
1367+
// Check if candidate is a direct child of this pointer
1368+
if (candidate_pointer.size() == pointer_location.size() + 1 &&
1369+
candidate_pointer.starts_with(pointer_location) &&
1370+
candidate_location->orphan &&
1371+
(candidate_location->type == SchemaFrame::LocationType::Subschema ||
1372+
candidate_location->type == SchemaFrame::LocationType::Resource)) {
1373+
return true;
1374+
}
1375+
}
1376+
return false;
1377+
}
1378+
1379+
static auto compute_orphan_depth(
1380+
const WeakPointer &pointer,
1381+
const std::map<WeakPointer, const SchemaFrame::Location *> &pointer_index)
1382+
-> std::size_t {
1383+
std::size_t depth{0};
1384+
bool previous_was_non_orphan{true};
1385+
const SchemaFrame::Location *previous_orphan_subschema{nullptr};
1386+
1387+
for (std::size_t index = 0; index < pointer.size(); ++index) {
1388+
const auto current{pointer.slice(0, index + 1)};
1389+
const auto iter{pointer_index.find(current)};
1390+
if (iter == pointer_index.end()) {
1391+
continue;
1392+
}
1393+
1394+
const auto &location{*iter->second};
1395+
if (location.orphan) {
1396+
if (location.type == SchemaFrame::LocationType::Pointer) {
1397+
if (previous_orphan_subschema != nullptr &&
1398+
is_definitions_container(current, pointer_index)) {
1399+
if (location.parent.has_value()) {
1400+
const auto parent_iter{pointer_index.find(location.parent.value())};
1401+
if (parent_iter != pointer_index.end() &&
1402+
parent_iter->second->parent.has_value()) {
1403+
const auto grandparent_iter{
1404+
pointer_index.find(parent_iter->second->parent.value())};
1405+
if (grandparent_iter != pointer_index.end() &&
1406+
grandparent_iter->second->orphan) {
1407+
depth++;
1408+
}
1409+
}
1410+
}
1411+
}
1412+
previous_was_non_orphan = false;
1413+
} else if (location.type == SchemaFrame::LocationType::Subschema ||
1414+
location.type == SchemaFrame::LocationType::Resource) {
1415+
if (previous_was_non_orphan) {
1416+
depth++;
1417+
}
1418+
previous_orphan_subschema = &location;
1419+
previous_was_non_orphan = false;
1420+
}
1421+
} else {
1422+
previous_orphan_subschema = nullptr;
1423+
previous_was_non_orphan = true;
1424+
}
1425+
}
1426+
1427+
return depth;
1428+
}
1429+
1430+
static auto is_applicator_descendant(
1431+
const WeakPointer &parent, const WeakPointer &child,
1432+
const std::map<WeakPointer, const SchemaFrame::Location *> &pointer_index)
1433+
-> bool {
1434+
if (child == parent) {
1435+
return true;
1436+
}
1437+
1438+
if (!child.starts_with(parent)) {
1439+
return false;
1440+
}
1441+
1442+
const auto parent_depth{compute_orphan_depth(parent, pointer_index)};
1443+
const auto child_depth{compute_orphan_depth(child, pointer_index)};
1444+
1445+
return child_depth <= parent_depth;
1446+
}
1447+
1448+
// TODO: Optimise this. Right now it works, but its very inefficient
1449+
auto SchemaFrame::is_reachable(const Location &location) const -> bool {
1450+
if (!location.orphan) {
1451+
return true;
1452+
}
1453+
1454+
std::map<WeakPointer, const Location *> pointer_index;
1455+
for (const auto &entry : this->locations_) {
1456+
pointer_index.emplace(entry.second.pointer, &entry.second);
1457+
}
1458+
1459+
std::set<WeakPointer> visited;
1460+
std::function<bool(const WeakPointer &)> is_reachable_via_references =
1461+
[&](const WeakPointer &target_pointer) -> bool {
1462+
if (visited.contains(target_pointer)) {
1463+
return false;
1464+
}
1465+
1466+
visited.insert(target_pointer);
1467+
1468+
for (const auto &reference : this->references_) {
1469+
const auto destination_location{this->locations_.find(
1470+
{SchemaReferenceType::Static, reference.second.destination})};
1471+
if (destination_location == this->locations_.cend()) {
1472+
const auto dynamic_destination{this->locations_.find(
1473+
{SchemaReferenceType::Dynamic, reference.second.destination})};
1474+
if (dynamic_destination == this->locations_.cend()) {
1475+
continue;
1476+
}
1477+
1478+
if (is_applicator_descendant(dynamic_destination->second.pointer,
1479+
target_pointer, pointer_index)) {
1480+
const auto &source_pointer{reference.first.second};
1481+
if (source_pointer.empty()) {
1482+
continue;
1483+
}
1484+
1485+
const auto parent_pointer{source_pointer.initial()};
1486+
1487+
const auto parent_location{this->traverse(parent_pointer)};
1488+
if (parent_location.has_value()) {
1489+
if (!parent_location->get().orphan) {
1490+
return true;
1491+
}
1492+
1493+
if (is_reachable_via_references(parent_pointer)) {
1494+
return true;
1495+
}
1496+
}
1497+
}
1498+
1499+
continue;
1500+
}
1501+
1502+
if (is_applicator_descendant(destination_location->second.pointer,
1503+
target_pointer, pointer_index)) {
1504+
const auto &source_pointer{reference.first.second};
1505+
if (source_pointer.empty()) {
1506+
continue;
1507+
}
1508+
const auto parent_pointer{source_pointer.initial()};
1509+
1510+
const auto parent_location{this->traverse(parent_pointer)};
1511+
if (parent_location.has_value()) {
1512+
if (!parent_location->get().orphan) {
1513+
return true;
1514+
}
1515+
1516+
if (is_reachable_via_references(parent_pointer)) {
1517+
return true;
1518+
}
1519+
}
1520+
}
1521+
}
1522+
1523+
return false;
1524+
};
1525+
1526+
return is_reachable_via_references(location.pointer);
1527+
}
1528+
13621529
} // namespace sourcemeta::core

src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame {
229229
/// Reset the frame, clearing all analysed data
230230
auto reset() -> void;
231231

232+
/// Determines if a location could be evaluated during validation
233+
[[nodiscard]] auto is_reachable(const Location &location) const -> bool;
234+
232235
private:
233236
Mode mode_;
234237
// Exporting symbols that depends on the standard C++ library is considered

0 commit comments

Comments
 (0)