8000 orca on k8s, AutoXGBoostClassifier and AutoXGBoostRegressor issue · Issue #27 · intel/analytics-zoo · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
orca on k8s, AutoXGBoostClassifier and AutoXGBoostRegressor issue #27

Closed
@glorysdj

Description

@glorysdj

AutoXGBoostClassifier.py

client command

${SPARK_HOME}/bin/spark-submit \
  --master ${RUNTIME_SPARK_MASTER} \
  --deploy-mode client \
  --conf spark.driver.host=172.16.0.200 \
  --conf spark.driver.port=54321 \
  --conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
  --name analytics-zoo-autoestimator \
  --conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
  --conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.driver.label.az=true \
  --conf spark.kubernetes.executor.label.az=true \
  --conf spark.kubernetes.node.selector.spark=true \
  --executor-cores ${RUNTIME_EXECUTOR_CORES} \
  --executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
  --total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
  --driver-cores ${RUNTIME_DRIVER_CORES} \
  --driver-memory ${RUNTIME_DRIVER_MEMORY} \
  --properties-file ${BIGDL_HOME}/conf/spark-bigdl.conf \
  --py-files local://${BIGDL_HOME}/python/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-serving-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local:///opt/bigdl-0.14.0-SNAPSHOT/examples/orca/automl/autoxgboost/AutoXGBoostClassifier.py \
  --conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
  --conf spark.sql.catalogImplementation='in-memory' \
  --conf spark.driver.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  local:///opt/bigdl-0.14.0-SNAPSHOT/examples/orca/automl/autoxgboost/AutoXGBoostClassifier.py \
  --path /home/airline_14col.data

cluster command

${SPARK_HOME}/bin/spark-submit \
  --master ${RUNTIME_SPARK_MASTER} \
  --deploy-mode cluster \
  --conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
  --name analytics-zoo-autoestimator \
  --conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
  --conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.driver.label.az=true \
  --conf spark.kubernetes.executor.label.az=true \
  --conf spark.kubernetes.node.selector.spark=true \
  --executor-cores ${RUNTIME_EXECUTOR_CORES} \
  --executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
  --total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
  --driver-cores ${RUNTIME_DRIVER_CORES} \
  --driver-memory ${RUNTIME_DRIVER_MEMORY} \
  --properties-file ${BIGDL_HOME}/conf/spark-bigdl.conf \
  --py-files local://${BIGDL_HOME}/python/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-serving-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local:///opt/bigdl-0.14.0-SNAPSHOT/examples/orca/automl/autoxgboost/AutoXGBoostClassifier.py \
  --conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
  --conf spark.sql.catalogImplementation='in-memory' \
  --conf spark.driver.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  local:///opt/bigdl-0.14.0-SNAPSHOT/examples/orca/automl/autoxgboost/AutoXGBoostClassifier.py \
  --path /home/airline_14col.data

Client Exception

Traceback (most recent call last):
  File "/opt/bigdl-0.14.0-SNAPSHOT/examples/orca/automl/autoxgboost/AutoXGBoostClassifier.py", line 24, in <module>
    from bigdl.orca.automl.xgboost import AutoXGBClassifier
  File "<frozen importlib._bootstrap>", line 971, in _find_and_load
  File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 656, in _load_unlocked
  File "<frozen importlib._bootstrap>", line 626, in _load_backward_compatible
  File "/opt/bigdl-0.14.0-SNAPSHOT/python/bigdl-friesian-spark_3.1.2-0.14.0-SNAPSHOT-python-api.zip/bigdl/orca/automl/xgboost/__init__.py", line 16, in <module>
  File "<frozen importlib._bootstrap>", line 971, in _find_and_load
  File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 656, in _load_unlocked
  File "<frozen importlib._bootstrap>", line 626, in _load_backward_compatible
  File "/opt/bigdl-0.14.0-SNAPSHOT/python/bigdl-friesian-spark_3.1.2-0.14.0-SNAPSHOT-python-api.zip/bigdl/orca/automl/xgboost/auto_xgb.py", line 18, in <module>
  File "<frozen importlib._bootstrap>", line 971, in _find_and_load
  File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 656, in _load_unlocked
  File "<frozen importlib._bootstrap>", line 626, in _load_backward_compatible
  File "/opt/bigdl-0.14.0-SNAPSHOT/python/bigdl-friesian-spark_3.1.2-0.14.0-SNAPSHOT-python-api.zip/bigdl/orca/automl/auto_estimator.py", line 16, in <module>
  File "<frozen importlib._bootstrap>", line 971, in _find_and_load
  File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 656, in _load_unlocked
  File "<frozen importlib._bootstrap>", line 626, in _load_backward_compatible
  File "/opt/bigdl-0.14.0-SNAPSHOT/python/bigdl-friesian-spark_3.1.2-0.14.0-SNAPSHOT-python-api.zip/bigdl/orca/automl/search/__init__.py", line 18, in <module>
  File "<frozen importlib._bootstrap>", line 971, in _find_and_load
  File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 656, in _load_unlocked
  File "<frozen importlib._bootstrap>", line 626, in _load_backward_compatible
  File "/opt/bigdl-0.14.0-SNAPSHOT/python/bigdl-friesian-spark_3.1.2-0.14.0-SNAPSHOT-python-api.zip/bigdl/orca/automl/search/tensorboardlogger.py", line 44, in <module>
  File "/opt/work/conda/envs/bigdl/lib/python3.6/site-packages/torch/__init__.py", line 189, in <module>
    _load_global_deps()
  File "/opt/work/conda/envs/bigdl/lib/python3.6/site-packages/torch/__init__.py", line 142, in _load_global_deps
    ctypes.CDLL(lib_path, mode=ctypes.RTLD_GLOBAL)
  File "/opt/work/conda/envs/bigdl/lib/python3.6/ctypes/__init__.py", line 348, in __init__
    self._handle = _dlopen(self._name, mode)
OSError: dlopen: cannot load any more object with static TLS
2021-10-19 07:50:17 INFO  ShutdownHookManager:57 - Shutdown hook called
2021-10-19 07:50:17 INFO  ShutdownHookManager:57 - Deleting directory /tmp/spark-16c432c8-d241-44c9-aaec-7ee29c65f8b4
2021-10-19 07:50:17 INFO  ShutdownHookManager:57 - Deleting directory /tmp/localPyFiles-da811fa6-efb5-4c1d-9cac-172cc159a9d4

Cluster Exception

Traceback (most recent call last):
  File "/opt/bigdl-0.14.0-SNAPSHOT/examples/orca/automl/autoxgboost/AutoXGBoostClassifier.py", line 24, in <module>
    from bigdl.orca.automl.xgboost import AutoXGBClassifier
  File "/opt/bigdl-0.14.0-SNAPSHOT/python/bigdl-friesian-spark_3.1.2-0.14.0-SNAPSHOT-python-api.zip/bigdl/orca/automl/xgboost/__init__.py", line 16, in <module>
  File "/opt/bigdl-0.14.0-SNAPSHOT/python/bigdl-friesian-spark_3.1.2-0.14.0-SNAPSHOT-python-api.zip/bigdl/orca/automl/xgboost/auto_xgb.py", line 17, in <module>
  File "/opt/bigdl-0.14.0-SNAPSHOT/python/bigdl-friesian-spark_3.1.2-0.14.0-SNAPSHOT-python-api.zip/bigdl/orca/automl/xgboost/XGBoost.py", line 19, in <module>
ModuleNotFoundError: No module named 'xgboost'
2021-10-19 08:01:45 INFO  ShutdownHookManager:57 - Shutdown hook called
2021-10-19 08:01:45 INFO  ShutdownHookManager:57 - Deleting directory /tmp/spark-8fb5d5a5-2706-4fcc-b331-e322498ac390

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      0