Skip to content

Commit fd5f974

Browse files
authored
Analyse structure fichiers NeTEx existants pour le GT7 (#4259)
* Trailing whitespace in readme * Modest README for the scripts directory * Refactoring script netex * Analyseur de structure de fichiers NeTEx Produit un CSV. * Niveau hiérarchique de chaque fichier * Please the linter
1 parent 1a5b798 commit fd5f974

File tree

4 files changed

+137
-47
lines changed

4 files changed

+137
-47
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ mix cmd --app transport mix test --color
166166
mix cmd --app unlock mix test --color
167167
168168
# or, for a single file, or single test
169-
mix cmd --app transport mix test --color test/transport_web/integrations/backoffice_test.exs
169+
mix cmd --app transport mix test --color test/transport_web/integrations/backoffice_test.exs
170170
mix cmd --app transport mix test --color test/transport_web/integrations/backoffice_test.exs:8
171171
```
172172

scripts/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Scripts
2+
3+
A unstructured collection of scripts to explore or patch data. Can be used to
4+
fix production data, to do some cold analysis, or to test some piece of code.
5+
6+
## Usage
7+
8+
Assuming you've sourced the required environment variables (mostly the database connector), you can launch a given script with `mix run`:
9+
10+
```
11+
mix run scripts/my-script.exs
12+
```

scripts/netex_analyzer.exs

+42-46
Original file line numberDiff line numberDiff line change
@@ -20,57 +20,53 @@ df =
2020
end)
2121
|> Enum.filter(&(&1.unverified_format == "NeTEx"))
2222

23-
netex =
24-
df
25-
|> Task.async_stream(
26-
fn r ->
27-
url = r.url
28-
file = Path.join("cache-dir", "resource-#{r.id}.dat")
29-
status_file = file <> ".status.json"
23+
download_resource = fn r ->
24+
url = r.url
25+
file = Path.join("cache-dir", "resource-#{r.id}.dat")
26+
status_file = file <> ".status.json"
3027

31-
unless File.exists?(status_file) do
32-
IO.puts("Saving #{url}")
33-
url = if String.contains?(url, "|"), do: URI.encode(url), else: url
28+
unless File.exists?(status_file) do
29+
IO.puts("Saving #{url}")
30+
url = if String.contains?(url, "|"), do: URI.encode(url), else: url
3431

35-
%{status: status} =
36-
Transport.HTTPClient.get!(url,
37-
decode_body: false,
38-
compressed: false,
39-
into: File.stream!(file)
40-
)
32+
%{status: status} =
33+
Transport.HTTPClient.get!(url,
34+
decode_body: false,
35+
compressed: false,
36+
into: File.stream!(file)
37+
)
4138

42-
File.write!(status_file, %{status: status} |> Jason.encode!())
43-
end
39+
File.write!(status_file, %{status: status} |> Jason.encode!())
40+
end
4441

45-
%{"status" => status} = File.read!(status_file) |> Jason.decode!()
42+
%{"status" => status} = File.read!(status_file) |> Jason.decode!()
4643

47-
r
48-
|> Map.put(:http_status, status)
49-
|> Map.put(:local_path, file)
50-
end,
51-
max_concurrency: 10,
52-
timeout: 120_000
53-
)
54-
|> Stream.map(fn {:ok, result} -> result end)
55-
|> Stream.reject(&is_nil(&1))
56-
|> Task.async_stream(
57-
fn r ->
58-
IO.puts("Processing file #{r.id}")
44+
r
45+
|> Map.put(:http_status, status)
46+
|> Map.put(:local_path, file)
47+
end
48+
49+
count_relevant_stop_places_per_resource = fn r ->
50+
IO.puts("Processing file #{r.id}")
5951

60-
try do
61-
count =
62-
Transport.NeTEx.read_all_stop_places(r.local_path)
63-
|> Enum.flat_map(fn {_file, stops} -> stops end)
64-
# some stop places have no latitude in NeTEx
65-
|> Enum.reject(fn p -> is_nil(p[:latitude]) end)
66-
|> Enum.count()
52+
try do
53+
count =
54+
Transport.NeTEx.read_all_stop_places(r.local_path)
55+
|> Enum.flat_map(fn {_file, stops} -> stops end)
56+
# some stop places have no latitude in NeTEx
57+
|> Enum.reject(fn p -> is_nil(p[:latitude]) end)
58+
|> Enum.count()
6759

68-
IO.puts("#{count} StopPlaces detected")
69-
rescue
70-
e -> IO.puts("Som'thing bad happened")
71-
end
72-
end,
73-
max_concurrency: 5,
74-
timeout: 60_000 * 5
75-
)
60+
IO.puts("#{count} StopPlaces detected")
61+
rescue
62+
_ -> IO.puts("Som'thing bad happened")
63+
end
64+
end
65+
66+
netex =
67+
df
68+
|> Task.async_stream(download_resource, max_concurrency: 10, timeout: 120_000)
69+
|> Stream.map(fn {:ok, result} -> result end)
70+
|> Stream.reject(&is_nil(&1))
71+
|> Task.async_stream(count_relevant_stop_places_per_resource, max_concurrency: 5, timeout: 60_000 * 5)
7672
|> Stream.run()

scripts/netex_layout_analyzer.exs

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
resources =
2+
DB.Resource
3+
|> DB.Repo.all()
4+
5+
# count
6+
resources
7+
|> Enum.count()
8+
|> IO.inspect()
9+
10+
df =
11+
resources
12+
|> Enum.map(fn r ->
13+
%{
14+
id: r.id,
15+
url: r.url,
16+
title: r.title,
17+
unverified_format: r.format,
18+
description: r.description
19+
}
20+
end)
21+
|> Enum.filter(&(&1.unverified_format == "NeTEx"))
22+
23+
download_resource = fn r ->
24+
url = r.url
25+
file = Path.join("cache-dir", "resource-#{r.id}.dat")
26+
status_file = file <> ".status.json"
27+
28+
unless File.exists?(status_file) do
29+
IO.puts("Saving #{url}")
30+
url = if String.contains?(url, "|"), do: URI.encode(url), else: url
31+
32+
%{status: status} =
33+
Transport.HTTPClient.get!(url,
34+
decode_body: false,
35+
compressed: false,
36+
into: File.stream!(file)
37+
)
38+
39+
File.write!(status_file, %{status: status} |> Jason.encode!())
40+
end
41+
42+
%{"status" => status} = File.read!(status_file) |> Jason.decode!()
43+
44+
r
45+
|> Map.put(:http_status, status)
46+
|> Map.put(:local_path, file)
47+
end
48+
49+
hierarchy_level = fn file -> file |> String.split("/") |> Enum.count() end
50+
51+
dump_netex_files = fn r ->
52+
IO.puts("Processing file #{r.id}")
53+
54+
url = "https://transport.data.gouv.fr/resources/#{r.id}"
55+
56+
result =
57+
try do
58+
Transport.NeTEx.read_all_stop_places(r.local_path)
59+
|> Enum.map(fn {file, _stops} -> file end)
60+
|> Enum.reject(fn file -> String.ends_with?(file, "/") end)
61+
|> Enum.map(fn file -> [url, r.title, r.url, file, hierarchy_level.(file)] end)
62+
rescue
63+
_ ->
64+
IO.puts("Som'thing bad happened")
65+
[]
66+
end
67+
68+
NimbleCSV.RFC4180.dump_to_iodata(result)
69+
end
70+
71+
output_file = "netex_layout_analysis.csv"
72+
73+
File.write(output_file, NimbleCSV.RFC4180.dump_to_iodata([~w(resource title url file hierarchy)]))
74+
75+
df
76+
|> Task.async_stream(download_resource, max_concurrency: 10, timeout: 120_000)
77+
|> Stream.map(fn {:ok, result} -> result end)
78+
|> Stream.reject(&is_nil(&1))
79+
|> Task.async_stream(dump_netex_files, max_concurrency: 5, timeout: 60_000 * 5)
80+
|> Stream.map(fn {:ok, result} -> result end)
81+
|> Stream.into(File.stream!(output_file, [:append, :utf8]))
82+
|> Stream.run()

0 commit comments

Comments
 (0)