import argparse
import os
import pickle
import shutil
import sys
import tempfile
import time
import traceback
from multiprocessing import Process, cpu_count
from pathlib import Path
from time import sleep
from typing import Any, Optional, Union
import psutil
import zmq
from pyproj import CRS
from py3dtiles.constants import EXIT_CODES
from py3dtiles.exceptions import (
Py3dtilesException,
SrsInMissingException,
TilerException,
WorkerException,
)
from py3dtiles.tilers.base_tiler import Tiler
from py3dtiles.tilers.base_tiler.message_type import ManagerMessage, WorkerMessageType
from py3dtiles.tilers.base_tiler.tiler_worker import TilerWorker
from py3dtiles.tilers.point.point_tiler import PointTiler
from py3dtiles.utils import mkdir_or_raise, str_to_CRS
TOTAL_MEMORY_MB = int(psutil.virtual_memory().total / (1024 * 1024))
DEFAULT_CACHE_SIZE = int(TOTAL_MEMORY_MB / 10)
CPU_COUNT = cpu_count()
# IPC protocol is not supported on Windows
if os.name == "nt":
URI = "tcp://127.0.0.1:0"
else:
# Generate a unique name for this socket
tmpdir = tempfile.TemporaryDirectory()
URI = f"ipc://{tmpdir.name}/py3dtiles.sock"
META_TILER_NAME = b"meta"
def _worker_target(
worker_tilers: dict[bytes, TilerWorker[Any]],
verbosity: int,
uri: bytes,
) -> None:
return _WorkerDispatcher(
worker_tilers,
verbosity,
uri,
).run()
class _WorkerDispatcher:
"""
This class waits from jobs commands from the Zmq socket.
"""
skt: zmq.Socket[bytes]
def __init__(
self,
worker_tilers: dict[bytes, TilerWorker[Any]],
verbosity: int,
uri: bytes,
) -> None:
self.worker_tilers = worker_tilers
self.verbosity = verbosity
self.uri = uri
# Socket to receive messages on
self.context = zmq.Context()
def run(self) -> None:
self.skt = self.context.socket(zmq.DEALER)
self.skt.connect(self.uri) # type: ignore [arg-type]
startup_time = time.time()
idle_time = 0.0
# notify we're ready
self.skt.send_multipart([WorkerMessageType.REGISTER.value])
while True:
try:
before = time.time() - startup_time
self.skt.poll()
after = time.time() - startup_time
idle_time += after - before
message = self.skt.recv_multipart()
tiler_name = message[1]
command = message[2]
content = message[3:]
delta = time.time() - pickle.loads(message[0])
if delta > 0.01 and self.verbosity >= 1:
print(
f"{os.getpid()} / {round(after, 2)} : Delta time: {round(delta, 3)}"
)
if command == ManagerMessage.SHUTDOWN.value:
break # ack
else:
self.worker_tilers[tiler_name].execute(self.skt, command, content)
# notify we're idle
self.skt.send_multipart([WorkerMessageType.IDLE.value])
except Exception as e:
traceback.print_exc()
error_message = f"{e.__class__.__module__}.{e.__class__.__name__}: {e}"
self.skt.send_multipart(
[WorkerMessageType.ERROR.value, error_message.encode()]
)
# we still print it for stacktraces
if self.verbosity >= 1:
print(
"total: {} sec, idle: {}".format(
round(time.time() - startup_time, 1), round(idle_time, 1)
)
)
self.skt.send_multipart([WorkerMessageType.HALTED.value])
# Manager
class _ZmqManager:
"""
This class sends messages to the workers.
We can also request general status.
"""
def __init__(
self,
number_of_jobs: int,
worker_tilers: dict[bytes, TilerWorker[Any]],
verbosity: int,
) -> None:
"""
For the process_args argument, see the init method of Worker
to get the list of needed parameters.
"""
self.context = zmq.Context()
self.number_of_jobs = number_of_jobs
self.socket = self.context.socket(zmq.ROUTER)
self.socket.bind(URI)
# Useful only when TCP is used to get the URI with the opened port
self.uri = self.socket.getsockopt(zmq.LAST_ENDPOINT)
if not isinstance(self.uri, bytes):
raise RuntimeError(
"The uri returned by self.socket.getsockopt should be bytes."
)
self.processes = [
Process(
target=_worker_target,
args=(worker_tilers, verbosity, self.uri),
)
for _ in range(number_of_jobs)
]
for p in self.processes:
p.start()
self.activities = [p.pid for p in self.processes]
self.clients: set[bytes] = set()
self.idle_clients: set[bytes] = set()
self.killing_processes = False
self.number_processes_killed = 0
self.time_waiting_an_idle_process = 0.0
def all_clients_registered(self) -> bool:
return len(self.clients) == self.number_of_jobs
def send_to_process(self, message: list[bytes]) -> None:
if not self.idle_clients:
raise ValueError("idle_clients is empty")
self.socket.send_multipart(
[self.idle_clients.pop(), pickle.dumps(time.time())] + message
)
def send_to_all_processes(self, message: list[bytes]) -> None:
if len(self.clients) == 0:
raise ValueError("No registered clients")
for client in self.clients:
self.socket.send_multipart([client, pickle.dumps(time.time())] + message)
def send_to_all_idle_processes(self, message: list[bytes]) -> None:
if not self.idle_clients:
raise ValueError("idle_clients is empty")
for client in self.idle_clients:
self.socket.send_multipart([client, pickle.dumps(time.time())] + message)
self.idle_clients.clear()
def can_queue_more_jobs(self) -> bool:
return len(self.idle_clients) != 0
def register_client(self, client_id: bytes) -> None:
if client_id in self.clients:
print(f"Warning: {client_id!r} already registered")
else:
self.clients.add(client_id)
self.add_idle_client(client_id)
def add_idle_client(self, client_id: bytes) -> None:
if client_id in self.idle_clients:
raise ValueError(f"The client id {client_id!r} is already in idle_clients")
self.idle_clients.add(client_id)
def are_all_processes_idle(self) -> bool:
return len(self.idle_clients) == self.number_of_jobs
def are_all_processes_killed(self) -> bool:
return self.number_processes_killed == self.number_of_jobs
def shutdown_all_processes(self) -> None:
self.send_to_all_processes([META_TILER_NAME, ManagerMessage.SHUTDOWN.value])
self.killing_processes = True
def join_all_processes(self) -> None:
for p in self.processes:
p.join()
[docs]
def convert(
files: Union[list[Union[str, Path]], str, Path],
outfolder: Union[str, Path] = "./3dtiles",
overwrite: bool = False,
jobs: int = CPU_COUNT,
cache_size: int = DEFAULT_CACHE_SIZE,
crs_out: Optional[CRS] = None,
crs_in: Optional[CRS] = None,
force_crs_in: bool = False,
pyproj_always_xy: bool = False,
benchmark: Optional[str] = None,
rgb: bool = True,
classification: bool = True,
intensity: bool = True,
color_scale: Optional[float] = None,
use_process_pool: bool = True,
verbose: int = False,
) -> None:
"""
Convert the input dataset into 3dtiles. For the argument list and their effects, please see :py:class:`.Converter`.
:param files: Filenames to process. The file must use the .las, .laz, .xyz or .ply format.
:param outfolder: The folder where the resulting tileset will be written.
:param overwrite: Overwrite the ouput folder if it already exists.
:param jobs: The number of parallel jobs to start. Default to the number of cpu.
:param cache_size: Cache size in MB. Default to available memory / 10.
:param crs_out: CRS to convert the output with
:param crs_in: Set a default input CRS
:param force_crs_in: Force every input CRS to be `crs_in`, even if not null
:param pyproj_always_xy: When converting from a CRS to another, pass the `always_xy` flag to pyproj. This is useful if your data is in a CRS whose definition specifies an axis order other than easting/northing, but your data still have the easting component in the first field (often named X or longitude). See https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6 for more information.
:param benchmark: Print summary at the end of the process
:param rgb: Export rgb attributes.
:param classification: Export classification attribute.
:param intensity: Export intensity attributes. This support is currently limited to unsigned 8 bits integer for ply files, and to integers for xyz files.
:param color_scale: Scale the color with the specified amount. Useful to lighten or darken black pointclouds with only intensity.
:raises SrsInMissingException: if py3dtiles couldn't find srs informations in input files and srs_in is not specified
:raises SrsInMixinException: if the input files have different CRS
"""
converter = _Convert(
files,
outfolder=outfolder,
overwrite=overwrite,
jobs=jobs,
cache_size=cache_size,
crs_out=crs_out,
crs_in=crs_in,
force_crs_in=force_crs_in,
pyproj_always_xy=pyproj_always_xy,
benchmark=benchmark,
rgb=rgb,
classification=classification,
intensity=intensity,
color_scale=color_scale,
use_process_pool=use_process_pool,
verbose=verbose,
)
return converter.convert()
class _Convert:
def __init__(
self,
files: Union[list[Union[str, Path]], str, Path],
outfolder: Union[str, Path] = "./3dtiles",
overwrite: bool = False,
jobs: int = CPU_COUNT,
cache_size: int = DEFAULT_CACHE_SIZE,
crs_out: Optional[CRS] = None,
crs_in: Optional[CRS] = None,
force_crs_in: bool = False,
pyproj_always_xy: bool = False,
benchmark: Optional[str] = None,
rgb: bool = True,
classification: bool = True,
intensity: bool = True,
color_scale: Optional[float] = None,
use_process_pool: bool = True,
verbose: int = False,
) -> None:
"""
:param files: Filenames to process. The file must use the .las, .laz, .xyz or .ply format.
:param outfolder: The folder where the resulting tileset will be written.
:param overwrite: Overwrite the ouput folder if it already exists.
:param jobs: The number of parallel jobs to start. Default to the number of cpu.
:param cache_size: Cache size in MB. Default to available memory / 10.
:param crs_out: CRS to convert the output with
:param crs_in: Set a default input CRS
:param force_crs_in: Force every input CRS to be `crs_in`, even if not null
:param pyproj_always_xy: When converting from a CRS to another, pass the `always_xy` flag to pyproj. This is useful if your data is in a CRS whose definition specifies an axis order other than easting/northing, but your data still have the easting component in the first field (often named X or longitude). See https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6 for more information.
:param benchmark: Print summary at the end of the process
:param rgb: Export rgb attributes.
:param classification: Export classification attribute.
:param intensity: Export intensity attribute.
:param color_scale: Scale the color with the specified amount. Useful to lighten or darken black pointclouds with only intensity.
:raises SrsInMissingException: if py3dtiles couldn't find srs informations in input files and srs_in is not specified
:raises SrsInMixinException: if the input files have different CRS
"""
# create folder
self.out_folder = Path(outfolder)
mkdir_or_raise(self.out_folder, overwrite=overwrite)
self.tilers = [
PointTiler(
self.out_folder,
files,
crs_in,
force_crs_in,
pyproj_always_xy,
rgb,
classification,
intensity,
color_scale,
cache_size,
verbose,
)
]
self.jobs = jobs
self.verbose = verbose
self.benchmark = benchmark
self.use_process_pool = use_process_pool
self.working_dir = self.out_folder / "tmp"
self.working_dir.mkdir(parents=True)
worker_tilers: dict[bytes, TilerWorker[Any]] = {}
for tiler in self.tilers:
if tiler.name in worker_tilers:
raise TilerException("There are tilers with the same attribute name.")
try:
tiler.initialization(
crs_out, self.working_dir / str(tiler.name), self.jobs
)
except Py3dtilesException as e:
shutil.rmtree(self.out_folder)
raise e
worker_tilers[tiler.name] = tiler.get_worker()
if self.verbose >= 1:
for tiler in self.tilers:
tiler.print_summary()
self.zmq_manager = _ZmqManager(
self.jobs,
worker_tilers,
self.verbose,
)
def convert(self) -> None:
"""convert
Convert pointclouds (xyz, las or laz) to 3dtiles tileset containing pnts node
"""
startup: float = time.time()
try:
for tiler in self.tilers:
while True:
now = time.time() - startup
at_least_one_job_ended = False
if (
not self.zmq_manager.can_queue_more_jobs()
or self.zmq_manager.socket.poll(timeout=0, flags=zmq.POLLIN)
):
at_least_one_job_ended = self.process_message(tiler)
# we wait for all processes/threads to register
# if we don't there are tricky cases where an exception fires in a worker before all the workers registered, which means that not all workers will receive the shutdown signal
if not self.zmq_manager.all_clients_registered():
sleep(0.1)
continue
if self.zmq_manager.can_queue_more_jobs():
for command, data in tiler.get_tasks(startup):
self.zmq_manager.send_to_process(
[PointTiler.name, command] + data
)
if not self.zmq_manager.can_queue_more_jobs():
break
# if at this point we have no work in progress => we're done
if self.zmq_manager.are_all_processes_idle():
break
if at_least_one_job_ended:
tiler.print_debug(
now, self.jobs, len(self.zmq_manager.idle_clients)
)
tiler.memory_control()
tiler.validate_binary_data()
if self.verbose >= 1:
print("Writing 3dtiles")
tiler.write_tileset(use_process_pool=self.use_process_pool)
shutil.rmtree(self.working_dir / str(tiler.name), ignore_errors=True)
if self.verbose >= 1:
print(f"Tiler {tiler.name!r} done")
if self.benchmark:
tiler.benchmark(self.benchmark, startup)
finally:
self.zmq_manager.shutdown_all_processes()
self.zmq_manager.join_all_processes()
if self.verbose >= 1:
print(
"destroy", round(self.zmq_manager.time_waiting_an_idle_process, 2)
)
self.zmq_manager.context.destroy()
def process_message(self, tiler: Tiler[Any, Any]) -> bool:
at_least_one_job_ended = False
# Blocking read but it's fine because either all our child processes are busy
# or we know that there's something to read (zmq.POLLIN)
start = time.time()
message = self.zmq_manager.socket.recv_multipart()
client_id = message[0]
return_type = message[1]
content = message[2:]
if return_type == WorkerMessageType.REGISTER.value:
self.zmq_manager.register_client(client_id)
elif return_type == WorkerMessageType.IDLE.value:
self.zmq_manager.add_idle_client(client_id)
if not self.zmq_manager.can_queue_more_jobs():
self.zmq_manager.time_waiting_an_idle_process += time.time() - start
elif return_type == WorkerMessageType.HALTED.value:
self.zmq_manager.number_processes_killed += 1
elif return_type == WorkerMessageType.ERROR.value:
raise WorkerException(
f"An exception occurred in a worker: {content[0].decode()}"
)
else:
at_least_one_job_ended = tiler.process_message(return_type, content)
return at_least_one_job_ended
def _init_parser(
subparser: "argparse._SubParsersAction[Any]",
) -> argparse.ArgumentParser:
parser: argparse.ArgumentParser = subparser.add_parser(
"convert",
help="Convert input 3D data to a 3dtiles tileset.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"files",
nargs="+",
help="Filenames to process. The file must use the .las, .laz (lastools must be installed), .xyz or .ply format.",
)
parser.add_argument(
"--out",
type=str,
help="The folder where the resulting tileset will be written.",
default="./3dtiles",
)
parser.add_argument(
"--overwrite",
help="Delete and recreate the ouput folder if it already exists. WARNING: be careful, there will be no confirmation!",
action="store_true",
)
parser.add_argument(
"--jobs",
help="The number of parallel jobs to start. Default to the number of cpu.",
default=cpu_count(),
type=int,
)
parser.add_argument(
"--cache_size",
help="Cache size in MB. Default to available memory / 10.",
default=int(TOTAL_MEMORY_MB / 10),
type=int,
)
parser.add_argument(
"--srs_out",
help="SRS to convert the output with (numeric part of the EPSG code)",
type=str,
)
parser.add_argument(
"--srs_in", help="Override input SRS (numeric part of the EPSG code)", type=str
)
parser.add_argument(
"--benchmark", help="Print summary at the end of the process", type=str
)
parser.add_argument(
"--no-rgb", help="Don't export rgb attributes", action="store_true"
)
parser.add_argument(
"--classification", help="Export classification attributes", action="store_true"
)
parser.add_argument(
"--intensity",
help="Export intensity attributes. This support is currently limited to unsigned 8 bits integer for ply files, and to integers for xyz files.",
action="store_true",
)
parser.add_argument("--color_scale", help="Force color scale", type=float)
parser.add_argument(
"--force-srs-in",
help="Force the input srs even if the srs in the input files are different. CAUTION, use only if you know what you are doing.",
action="store_true",
)
parser.add_argument(
"--disable-processpool",
help="Disables using a process pool when writing 3D tiles. Useful for running in environments lacking shared memory.",
action="store_true",
)
parser.add_argument(
"--pyproj-always-xy",
help="When converting from a CRS to another, pass the `always_xy` flag to pyproj. This is useful if your data is in a CRS whose definition specifies an axis order other than easting/northing, but your data still have the easting component in the first field (often named X or longitude). See https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6 for more information. ",
action="store_true",
)
return parser
def _main(args: argparse.Namespace) -> None:
try:
return convert(
args.files,
outfolder=args.out,
overwrite=args.overwrite,
jobs=args.jobs,
cache_size=args.cache_size,
crs_out=str_to_CRS(args.srs_out),
crs_in=str_to_CRS(args.srs_in),
force_crs_in=args.force_srs_in,
pyproj_always_xy=args.pyproj_always_xy,
benchmark=args.benchmark,
rgb=not args.no_rgb,
classification=args.classification,
intensity=args.intensity,
color_scale=args.color_scale,
use_process_pool=not args.disable_processpool,
verbose=args.verbose,
)
except SrsInMissingException:
print(
"No SRS information in input files, you should specify it with --srs_in",
file=sys.stderr,
)
sys.exit(EXIT_CODES.MISSING_SRS_IN_FILE.value)