#python #macos #multiprocessing #apple-m1 #r-tree
#питон #macos #многопроцессорная обработка #яблоко-м1 #r-дерево
Вопрос:
При использовании пакета rtree с несколькими процессами произошла какая-то странная вещь.
Кажется, все идет правильно, за исключением rtree index
экземпляра в дочернем процессе с именем «Процесс-1». Я вставил поле границ в Index
экземпляр в главном потоке, но index
изменение в пустой объект в процессе-1, в то время pandas.Dataframe
как объект этого не делает.
Я понятия не имею, в чем причина такой проблемы. Кто-нибудь может оказать мне услугу?
Информация о моей компьютерной среде выглядит следующим образом:
- macOS m1 Big Sur 11.6 (20G165)
- минифордж conda 4.11.0 с Python 3.9.6
- rtree 0.9.7
#! python3 # -- encoding: utf-8 -- import multiprocessing from rtree.index import Index as RTreeIndex from multiprocessing import Process import pandas as pd class Outclass: def __init__(self) -gt; None: # prepare index instance self.rtree = RTreeIndex() self.rtree.insert(1, (34, 26, 49, 41)) # prepare dataframe instance self.df = pd.DataFrame([[1, 2, 3], ['a', 'b', 'c']]) print("index class: ", index) # size=1 ret = list(self.rtree.nearest((10, 20), 1)) print("ret class: ", ret) # ret is list [1] print("df class: ", df) def test_process(self): p = Process(target=wrapper, name="Process-2", args=(self.rtree, 10, 20, self.df)) p.start() p.join() def test_process_pool(self): pool = multiprocessing.Pool(2) stas = pool.starmap_async(wrapper, [(self.rtree, 10, 20, self.df)]) stas.get() def test_process_pool_local(self): # prepare index instance rtree = RTreeIndex() rtree.insert(1, (34, 26, 49, 41)) pool = multiprocessing.Pool(2) stas = pool.starmap_async(wrapper, [(rtree, 10, 20, self.df)]) stas.get() def wrapper(index, x, y, df): print("index inner: ", index) # size=0 ret = list(index.nearest((x, y), 1)) print("ret inner: ", ret) # ret is empty list [] print("df inner: ", df) return ret if __name__ == "__main__": # prepare index instance index = RTreeIndex() index.insert(1, (34, 26, 49, 41)) # prepare dataframe instance df = pd.DataFrame([[1, 2, 3], ['a', 'b', 'c']]) print("index outer: ", index) # size=1 ret = list(index.nearest((10, 20), 1)) print("ret outer: ", ret) # ret is list [1] print("df outer: ", df) print("--"*10) p = Process(target=wrapper, name="Process-1", args=(index, 10, 20, df)) p.start() p.join() print("=="*10) oc = Outclass() print(" "*10) oc.test_process() print("**"*10) oc.test_process_pool() print("@@"*10) oc.test_process_pool_local()
результат mac:
(geo) [dogqizi@MacDog ~ ]$ python rtreeP.py index outer: rtree.index.Index(bounds=[34.0, 26.0, 49.0, 41.0], size=1) ret outer: [1] df outer: 0 1 2 0 1 2 3 1 a b c -------------------- index inner: rtree.index.Index(bounds=[1.7976931348623157e 308, 1.7976931348623157e 308, -1.7976931348623157e 308, -1.7976931348623157e 308], size=0) ret inner: [] df inner: 0 1 2 0 1 2 3 1 a b c ==================== index class: rtree.index.Index(bounds=[34.0, 26.0, 49.0, 41.0], size=1) ret class: [1] df class: 0 1 2 0 1 2 3 1 a b c index inner: rtree.index.Index(bounds=[1.7976931348623157e 308, 1.7976931348623157e 308, -1.7976931348623157e 308, -1.7976931348623157e 308], size=0) ret inner: [] df inner: 0 1 2 0 1 2 3 1 a b c ******************** index inner: rtree.index.Index(bounds=[1.7976931348623157e 308, 1.7976931348623157e 308, -1.7976931348623157e 308, -1.7976931348623157e 308], size=0) ret inner: [] df inner: 0 1 2 0 1 2 3 1 a b c @@@@@@@@@@@@@@@@@@@@ index inner: rtree.index.Index(bounds=[1.7976931348623157e 308, 1.7976931348623157e 308, -1.7976931348623157e 308, -1.7976931348623157e 308], size=0) ret inner: [] df inner: 0 1 2 0 1 2 3 1 a b c
What’s more, I did an experiment in linux it seems much more different. Environment as follow:
- Linux VM-0-2-centos 3.10.0-1160.31.1.el7.x86_64 #1 SMP Thu Jun 10 13:32:12 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
- Completely same python env like macOS above.
linux result:
(geo) lighthouse@VM-0-2-centos:Codes$/home/lighthouse/miniforge3/envs/geo/bin/python /home/lighthouse/Codes/rtreeP.py index outer: rtree.index.Index(bounds=[34.0, 26.0, 49.0, 41.0], size=1) ret outer: [1] df outer: 0 1 2 0 1 2 3 1 a b c -------------------- index inner: rtree.index.Index(bounds=[34.0, 26.0, 49.0, 41.0], size=1) ret inner: [1] df inner: 0 1 2 0 1 2 3 1 a b c ==================== index class: rtree.index.Index(bounds=[34.0, 26.0, 49.0, 41.0], size=1) ret class: [1] df class: 0 1 2 0 1 2 3 1 a b c index inner: rtree.index.Index(bounds=[34.0, 26.0, 49.0, 41.0], size=1) ret inner: [1] df inner: 0 1 2 0 1 2 3 1 a b c ******************** index inner: rtree.index.Index(bounds=[1.7976931348623157e 308, 1.7976931348623157e 308, -1.7976931348623157e 308, -1.7976931348623157e 308], size=0) ret inner: [] df inner: 0 1 2 0 1 2 3 1 a b c @@@@@@@@@@@@@@@@@@@@ index inner: rtree.index.Index(bounds=[1.7976931348623157e 308, 1.7976931348623157e 308, -1.7976931348623157e 308, -1.7976931348623157e 308], size=0) ret inner: [] df inner: 0 1 2 0 1 2 3 1 a b c
How to deal with python rtree index using multi-process?
Of course I tried multi-thread also, but error occurs still.
multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 2 leaked semaphore objects to clean up at shutdown warnings.warn('resource_tracker: There appear to be %d') 'Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)'n"
Я предполагаю, что это может быть связано с тем, что rtree написан CPython?