augerai/a2ml

DatasetValidationError on import_data_task in Azure

Closed this issue · 2 comments

DatasetValidationError on import_data_task in Azure

https://app-staging.auger.ai/admin/cluster_tasks/81400 - this happened when I tried to load CSV file through UI, after I retried task it worked

https://app-staging.auger.ai/admin/cluster_tasks/83283 - this happened on review process, after retry it also worked

It happens time to time, Probably it's race conditions

cc @skatedplrn

https://app-staging.auger.ai/admin/cluster_tasks/88391

{
  "azure": {
    "data": "HTTP Error 400: Bad Request",
    "result": false
  }
}

Log:

2020-08-28T10:49:44.192162163Z [2020-08-28 10:49:44,191: INFO/MainProcess] Received task: a2ml.tasks_queue.tasks_hub_api.import_data_task[af20e889-f734-49b7-a58e-749989c0cdaf]  
2020-08-28T10:49:49.991546407Z [2020-08-28 10:49:49,991: INFO/ForkPoolWorker-2] [azure]  HTTP Error 400: Bad Request
2020-08-28T10:49:50.929878129Z [2020-08-28 10:49:50,929: INFO/ForkPoolWorker-2] Task a2ml.tasks_queue.tasks_hub_api.import_data_task[af20e889-f734-49b7-a58e-749989c0cdaf] succeeded in 6.571025623998139s: None

Stack trace

2020-08-28T18:33:04.446149895Z [2020-08-28 18:33:04,445: INFO/MainProcess] Received task: a2ml.tasks_queue.tasks_hub_api.import_data_task[2ed813cc-6bf1-4b3d-ab91-b312d8effb41]  
2020-08-28T18:33:06.181432202Z [2020-08-28 18:33:06,181: WARNING/ForkPoolWorker-601] [warning] SOCKS support in urllib3 requires the installation of optional dependencies: specifically, PySocks.  For more information, see https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies
2020-08-28T18:33:06.702998511Z [2020-08-28 18:33:06,702: WARNING/ForkPoolWorker-601] [warning] FileType Enum is Deprecated in > 1.0.39. Use strings instead.
2020-08-28T18:33:07.346427084Z [2020-08-28 18:33:07,346: WARNING/ForkPoolWorker-601] [warning] Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working
2020-08-28T18:33:07.349004917Z [2020-08-28 18:33:07,348: INFO/ForkPoolWorker-603] a2ml.tasks_queue.tasks_hub_api.distribution_chart_stats_task[bcc591e3-6023-4216-8330-03e05b65e925]: Send JSON data to Hub: {"type": "TaskResult", "provider": "azure", "hub_info": {"pipeline_id": "AutoML_3adce96a-b0ad-42e8-9e48-559ffa5b9b37_9", "project_file": {"id": 2974, "url": "https://options-auger.s3.us-east-2.amazonaws.com/training-2020-08-10T191701Z.csv", "name": "training-2020-08-10T191701Z.csv", "object": "project_file", "status": "processed", "deleted": false, "industry": null, "image_url": null, "providers": ["auger", "azure"], "created_at": "2020-08-10T20:52:25.991Z", "project_id": 851, "updated_at": "2020-08-20T16:18:06.680Z", "description": null, "content_type": null, "error_message": null, "providers_data": {"auger": null, "azure": {"url": "training-2020-08-10T191701Z.csv"}}}, "project_name": "a2mlworkspacestaging", "project_path": "s3://auger-mt-org-cinpns/workspace/projects/options-a2ml", "experiment_id": "1989e7883ebd05c4", "prediction_id": null, "cluster_task_id": 89279, "experiment_name": "training-2020-08-10T191701Z.csv-experiment", "project_file_id": 2974, "experiment_session": {"id": "1e33214c11a62eb2", "number": 4, "object": "experiment_session", "status": "completed", "deleted": false, "message": null, "runtime": 9492.835444, "providers": ["azure"], "created_at": "2020-08-11T19:14:10.174Z", "model_type": "regression", "project_id": 851, "started_at": "2020-08-11T19:14:10.174Z", "updated_at": "2020-08-11T21:52:23.010Z", "completed_at": "2020-08-11T21:52:23.010Z", "search_space": null, "error_message": null, "experiment_id": "1989e7883ebd05c4", "model_settings": {"evaluation_options": {"scoring": "r2_score", "max_n_trials": 10, "use_ensemble": true, "classification": false, "test_data_path": null, "max_eval_time_mins": 60, "max_total_time_mins": 120, "cpu_per_mt_algorithm": 4, "crossValidationFolds": 5, "max_concurrent_iterations": 4}}, "providers_data": {"azure": {"id": "AutoML_3adce96a-b0ad-42e8-9e48-559ffa5b9b37", "errors": {"error": null, "error_details": null}, "status": "completed", "completed_evaluations": 8}}, "project_file_id": 2974, "starter_user_id": 21, "top_score_value": 0.706982541322667, "dataset_statistics": {"count": 1972128, "stat_data": [{"use": false, "datatype": "boolean", "isTarget": false, "column_name": "predicted_gainer", "value_counts": {"f": 1798895, "t": 173233}, "orig_datatype": "string", "unique_values": 2, "value_counts_ex": [{"count": 1798895, "value": "f"}, {"count": 173233, "value": "t"}]}, {"avg": -0.076688, "use": false, "range": ["-1.00", "295.00"], "std_dev": 0.736028, "datatype": "double", "isTarget": true, "histogram": {"hist": [1972005, 84, 26, 8, 2, 0, 0, 2, 0, 1], "bin_edges": [-0.9996019900497509, 28.60035820895522, 58.2003184079602, 87.80027860696516, 117.4002388059701, 147.0001990049751, 176.6001592039801, 206.2001194029851, 235.8000796019901, 265.400039800995, 295.0]}, "column_name": "predicted_delta_price", "orig_datatype": "double", "unique_values": 160005}, {"avg": 136.1273, "use": false, "range": ["0.50", "3300.00"], "std_dev": 178.879777, "datatype": "double", "isTarget": false, "histogram": {"hist": [1865273, 87359, 4253, 4813, 5149, 1724, 800, 1502, 708, 547], "bin_edges": [0.5, 330.45, 660.4, 990.3499999999999, 1320.3, 1650.25, 1980.2, 2310.15, 2640.1, 2970.05, 3300.0]}, "column_name": "strike_price", "orig_datatype": "double", "unique_values": 818}, {"avg": 136.224451, "use": false, "range": ["0.48", "3200.00"], "std_dev": 178.809939, "datatype": "double", "isTarget": false, "histogram": {"hist": [1855425, 96455, 4840, 4275, 5765, 1356, 1205, 1208, 943, 656], "bin_edges": [0.4786, 320.43074, 640.38288, 960.33502, 1280.28716, 1600.2393, 1920.19144, 2240.14358, 2560.09572, 2880.04786, 3200.0]}, "column_name": "underlying_price", "orig_datatype": "double", "unique_values": 21436}, {"avg": 0.042523, "use": true, "range": ["-0.77", "49.83"], "std_dev": 0.356574, "datatype": "double", "isTarget": false, "histogram": {"hist": [1970583, 1265, 150, 41, 12, 50, 10, 8, 5, 4], "bin_edges": [-0.7728384557709399, 4.287568464746874, 9.34797538526469, 14.4083823057825, 19.46878922630032, 24.52919614681813, 29.58960306733595, 34.65000998785376, 39.71041690837158, 44.77082382888939, 49.8312307494072]}, "column_name": "avg_price_change", "orig_datatype": "double", "unique_values": 1917643}, {"use": true, "datatype": "datetime", "isTarget": false, "column_name": "data_date", "orig_datatype": "datetime", "unique_values": 123}, {"avg": 54.045773, "use": true, "range": ["0.00", "364.00"], "std_dev": 70.196704, "datatype": "integer", "isTarget": false, "histogram": {"hist": [1263651, 279075, 96707, 84744, 68537, 82937, 48919, 21789, 14818, 10951], "bin_edges": [0.0, 36.4, 72.8, 109.2, 145.6, 182.0, 218.4, 254.8, 291.2, 327.6, 364.0]}, "column_name": "days_left", "orig_datatype": "integer", "unique_values": 267}, {"avg": 3.041938, "use": true, "range": ["1.00", "5.00"], "std_dev": 1.363659, "datatype": "integer", "isTarget": false, "histogram": {"hist": [332479, 0, 422539, 0, 0, 420959, 0, 422097, 0, 374054], "bin_edges": [1.0, 1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 5.0]}, "column_name": "day_of_week", "orig_datatype": "integer", "unique_values": 5}, {"avg": -8.451585, "use": true, "range": ["-3209.91", "34.74"], "std_dev": 168.889916, "datatype": "double", "isTarget": false, "column_name": "debttoequity", "orig_datatype": "double", "unique_values": 457, "missing_values": 1876}, {"avg": 0.052427, "use": true, "range": ["-1.00", "407.00"], "std_dev": 1.060254, "datatype": "double", "isTarget": false, "histogram": {"hist": [1971993, 97, 22, 11, 0, 0, 2, 1, 1, 1], "bin_edges": [-0.9970414201183431, 39.80266272189349, 80.60236686390533, 121.4020710059172, 162.201775147929, 203.0014792899408, 243.8011834319527, 284.6008875739645, 325.4005917159763, 366.2002958579881, 407.0]}, "column_name": "delta_price", "orig_datatype": "double", "unique_values": 255665}, {"avg": -0.194821, "use": true, "range": ["-1.00", "25.29"], "std_dev": 0.899279, "datatype": "double", "isTarget": false, "histogram": {"hist": [1888156, 74100, 7953, 1421, 356, 100, 22, 14, 3, 3], "bin_edges": [-0.999910503100428, 1.629508340125145, 4.258927183350719, 6.888346026576292, 9.517764869801864, 12.14718371302744, 14.77660255625301, 17.40602139947859, 20.03544024270416, 22.66485908592973, 25.2942779291553]}, "column_name": "delta_over_avg_price", "orig_datatype": "double", "unique_values": 1852508}, {"avg": 0.072243, "use": true, "range": ["-1.00", "21.00"], "std_dev": 0.806958, "datatype": "double", "isTarget": false, "histogram": {"hist": [1815236, 145582, 8955, 1598, 471, 143, 90, 37, 13, 3], "bin_edges": [-0.99798704846377, 1.201391488315377, 3.400770025094524, 5.600148561873672, 7.799527098652819, 9.998905635431965, 12.19828417221111, 14.39766270899026, 16.59704124576941, 18.79641978254855, 20.9957983193277]}, "column_name": "delta_over_avg_underlying_price", "orig_datatype": "double", "unique_values": 1830374}, {"avg": 0.00129, "use": true, "range": ["-0.38", "0.93"], "std_dev": 0.028311, "datatype": "double", "isTarget": false, "histogram": {"hist": [102, 2973, 1489883, 476976, 2031, 58, 48, 3, 38, 16], "bin_edges": [-0.378395451674037, -0.2472589462572179, -0.1161224408403988, 0.01501406457642035, 0.1461505699932394, 0.2772870754100585, 0.4084235808268777, 0.5395600862436969, 0.6706965916605159, 0.8018330970773351, 0.9329696024941542]}, "column_name": "delta_underlying", "orig_datatype": "double", "unique_values": 61243}, {"avg": 0.434738, "use": true, "range": ["-3427.00", "93184.00"], "std_dev": 67.845288, "datatype": "double", "isTarget": false, "histogram": {"hist": [1972126, 1, 0, 0, 0, 0, 0, 0, 0, 1], "bin_edges": [-3427.0, 6234.1, 15895.2, 25556.3, 35217.4, 44878.5, 54539.600000000006, 64200.7, 73861.8, 83522.90000000001, 93184.0]}, "column_name": "delta_volume", "orig_datatype": "double", "unique_values": 618}, {"avg": -0.000427, "use": true, "range": ["-0.05", "0.05"], "std_dev": 0.028395, "datatype": "double", "isTarget": false, "histogram": {"hist": [189355, 198436, 201828, 203077, 202539, 208123, 201137, 197179, 189577, 180877], "bin_edges": [-0.0499982407374828, -0.03999859927211259, -0.02999895780674238, -0.01999931634137218, -0.009999674876001965, -3.34106317542715e-08, 0.00999960805473845, 0.01999924952010867, 0.02999889098547887, 0.03999853245084908, 0.0499981739162193]}, "column_name": "distance", "orig_datatype": "double", "unique_values": 101888}, {"avg": 0.026231, "use": true, "range": ["0.00", "1.01"], "std_dev": 0.02737, "datatype": "double", "isTarget": false, "column_name": "dividend_yield", "orig_datatype": "double", "unique_values": 407, "missing_values": 1766}, {"avg": 0.039429, "use": true, "range": ["-13.46", "0.71"], "std_dev": 0.101943, "datatype": "double", "isTarget": false, "column_name": "earnings_pct", "orig_datatype": "double", "unique_values": 40888, "missing_values": 20656}, {"avg": 0.143933, "use": true, "range": ["-57.87", "61.24"], "std_dev": 3.01937, "datatype": "double", "isTarget": false, "column_name": "ebitgrowth", "orig_datatype": "double", "unique_values": 470, "missing_values": 16111}, {"avg": -0.218599, "use": true, "range": ["-2896.77", "409.38"], "std_dev": 72.856965, "datatype": "double", "isTarget": false, "column_name": "fcffgrowth", "orig_datatype": "double", "unique_values": 469, "missing_values": 19368}, {"avg": 84193709973.43858, "use": true, "range": ["54401613.00", "1246018910000.00"], "std_dev": 153591595988.0705, "datatype": "double", "isTarget": false, "column_name": "market_cap", "orig_datatype": "double", "unique_values": 474, "missing_values": 1766}, {"avg": 0.192281, "use": true, "range": ["-16.21", "13.50"], "std_dev": 1.434123, "datatype": "double", "isTarget": false, "column_name": "ocfgrowth", "orig_datatype": "double", "unique_values": 471, "missing_values": 11594}, {"use": true, "datatype": "categorical", "isTarget": false, "column_name": "p_c", "value_counts": {"C": 1014486, "P": 957642}, "orig_datatype": "string", "unique_values": 2, "value_counts_ex": [{"count": 1014486, "value": "C"}, {"count": 957642, "value": "P"}]}, {"avg": 20.729762, "use": true, "range": ["0.00", "1403.38"], "std_dev": 101.037486, "datatype": "double", "isTarget": false, "column_name": "price_book", "orig_datatype": "double", "unique_values": 470, "missing_values": 11339}, {"avg": 29.157959, "use": true, "range": ["-251.53", "592.93"], "std_dev": 49.221768, "datatype": "double", "isTarget": false, "column_name": "price_earnings", "orig_datatype": "double", "unique_values": 475, "missing_values": 1766}, {"avg": 13.746705, "use": true, "range": ["0.00", "2159.06"], "std_dev": 142.563789, "datatype": "double", "isTarget": false, "column_name": "price_revenue", "orig_datatype": "double", "unique_values": 470, "missing_values": 1876}, {"avg": 0.049383, "use": true, "range": ["-1.36", "5.13"], "std_dev": 0.272459, "datatype": "double", "isTarget": false, "column_name": "revenuegrowth", "orig_datatype": "double", "unique_values": 470, "missing_values": 16446}, {"use": true, "datatype": "categorical", "isTarget": false, "column_name": "sector", "value_counts": {"Energy": 113488, "Materials": 67326, "Utilities": 56589, "Financials": 205371, "Health Care": 327660, "Industrials": 225934, "Real Estate": 41568, "Consumer Staples": 208331, "Consumer Discretionary": 314367, "Information Technology": 375559, "Telecommunication Services": 15279}, "orig_datatype": "string", "unique_values": 12, "missing_values": 20656, "value_counts_ex": [{"count": 375559, "value": "Information Technology"}, {"count": 327660, "value": "Health Care"}, {"count": 314367, "value": "Consumer Discretionary"}, {"count": 225934, "value": "Industrials"}, {"count": 208331, "value": "Consumer Staples"}, {"count": 205371, "value": "Financials"}, {"count": 113488, "value": "Energy"}, {"count": 67326, "value": "Materials"}, {"count": 56589, "value": "Utilities"}, {"count": 41568, "value": "Real Estate"}, {"count": 15279, "value": "Telecommunication Services"}]}, {"avg": 0.675197, "use": true, "range": ["0.06", "1.59"], "std_dev": 0.100533, "datatype": "double", "isTarget": false, "column_name": "year_vary", "orig_datatype": "double", "unique_values": 474, "missing_values": 1766}, {"avg": 0.074001, "use": true, "range": ["-0.86", "18.62"], "std_dev": 0.487439, "datatype": "double", "isTarget": false, "column_name": "delta_stock_volume", "orig_datatype": "double", "unique_values": 43492, "missing_values": 92}, {"avg": 0.000655, "use": true, "range": ["-0.75", "0.62"], "std_dev": 0.021044, "datatype": "double", "isTarget": false, "histogram": {"hist": [120, 0, 2, 123, 18122, 1943330, 10001, 336, 40, 54], "bin_edges": [-0.747365158090515, -0.6102899326040442, -0.4732147071175734, -0.3361394816311025, -0.1990642561446317, -0.06198903065816097, 0.07508619482830992, 0.2121614203147807, 0.3492366458012515, 0.4863118712877224, 0.623387096774193]}, "column_name": "delta_afterhours", "orig_datatype": "double", "unique_values": 42559}], "columns_count": 30}, "dataset_manifest_id": "828bd95a0203005a"}, "dataset_manifest_id": "828bd95a0203005a", "experiment_session_id": "1e33214c11a62eb2"}, "status": "success", "runtime": 45.749157190322876, "result": {"2020-08-24": {"predicted_predicted_delta_price": {"avg": -0.1637150279576552, "std_dev": 0.0961785147172678, "imp": 0}, "avg_price_change": {"avg": 0.0, "std_dev": 0.0, "imp": 0.000195}, "data_date": {"dist": {"2020-08-24": 8296}, "imp": 0.051324}, "days_left": {"avg": 57.94623915139827, "std_dev": 73.44412696556313, "imp": 0.032152}, "day_of_week": {"avg": 1.0, "std_dev": 0.0, "imp": 0.000797}, "debttoequity": {"avg": 0.08486821359691453, "std_dev": 13.50361830579007, "imp": 0.000735}, "delta_price": {"avg": 0.0, "std_dev": 0.0, "imp": 0.02582}, "delta_over_avg_price": {"avg": 0.2310086582701198, "std_dev": 0.5900894809853053, "imp": 0.052062}, "delta_over_avg_underlying_price": {"avg": 1.1727012113941716e-17, "std_dev": 3.6435292765397057e-16, "imp": 0.018138}, "delta_underlying": {"avg": 0.0, "std_dev": 0.0, "imp": 0.095268}, "delta_volume": {"avg": 0.0, "std_dev": 0.0, "imp": 0.000857}, "distance": {"avg": -0.0026852960649947377, "std_dev": 0.02872942466500326, "imp": 0.011778}, "dividend_yield": {"avg": 0.021658444069431054, "std_dev": 0.01699883568342046, "imp": 0.000757}, "earnings_pct": {"avg": 0.03175959777473209, "std_dev": 0.08101752644374777, "imp": 0.000622}, "ebitgrowth": {"avg": 0.13099633642719385, "std_dev": 1.987340852623593, "imp": 0.000774}, "fcffgrowth": {"avg": -5.860800529893927, "std_dev": 142.7117410107019, "imp": 0.000256}, "market_cap": {"avg": 87329010712.17346, "std_dev": 171802082036.34216, "imp": 0.023501}, "ocfgrowth": {"avg": 0.3215011333172614, "std_dev": 2.197032206777087, "imp": 0.000204}, "p_c": {"dist": {"C": 4366, "P": 3930}, "imp": 0.060361}, "price_book": {"avg": 17.21766069189972, "std_dev": 59.53485840880176, "imp": 0.000667}, "price_earnings": {"avg": 31.31919758919963, "std_dev": 45.10977234688211, "imp": 0.000181}, "price_revenue": {"avg": 28.852721709257477, "std_dev": 227.11283472876408, "imp": 0.000274}, "revenuegrowth": {"avg": 0.06686582328833178, "std_dev": 0.3510797405198097, "imp": 0.003276}, "sector": {"dist": {"Health Care": 1573, "Information Technology": 1635, "Financials": 1223, "Materials": 198, "Real Estate": 239, "Consumer Staples": 711, "Industrials": 717, "Consumer Discretionary": 1127, "Utilities": 236, "Energy": 395, "": 212, "Telecommunication Services": 30}, "imp": 0.000562}, "year_vary": {"avg": 0.6707410948011083, "std_dev": 0.09910197979599422, "imp": 0.000754}, "delta_stock_volume": {"avg": 0.08438456404113456, "std_dev": 0.441846352836815, "imp": 0.006112}, "delta_afterhours": {"avg": 0.004772295636917571, "std_dev": 0.0069699472503413915, "imp": 0.002062}, "actual_predicted_delta_price": {"avg": -0.13162321438274072, "std_dev": 0.39665628303912703, "imp": 0}}, "2020-08-25": {"predicted_predicted_delta_price": {"avg": -0.09160704913977963, "std_dev": 0.03784128864763512, "imp": 0}, "avg_price_change": {"avg": 0.0, "std_dev": 0.0, "imp": 0.000195}, "data_date": {"dist": {"2020-08-25": 9}, "imp": 0.051324}, "days_left": {"avg": 139.88888888888889, "std_dev": 70.8792008357255, "imp": 0.032152}, "day_of_week": {"avg": 2.0, "std_dev": 0.0, "imp": 0.000797}, "debttoequity": {"avg": -23.4662020-08-28T18:33:07.349004917Z 500000000003, "std_dev": 68.57102681615976, "imp": 0.000735}, "delta_price": {"avg": 0.0, "std_dev": 0.0, "imp": 0.02582}, "delta_over_avg_price": {"avg": 0.0, "std_dev": 0.0, "imp": 0.052062}, "delta_over_avg_underlying_price": {"avg": 2.245155654911473e-16, "std_dev": 3.6279600609255133e-16, "imp": 0.018138}, "delta_underlying": {"avg": 0.0, "std_dev": 0.0, "imp": 0.095268}, "delta_volume": {"avg": 0.0, "std_dev": 0.0, "imp": 0.000857}, "distance": {"avg": -0.009043649655571352, "std_dev": 0.031158578122551003, "imp": 0.011778}, "dividend_yield": {"avg": 0.02028422222222222, "std_dev": 0.013830768270217113, "imp": 0.000757}, "earnings_pct": {"avg": 0.05735383999823371, "std_dev": 0.09165092634071227, "imp": 0.000622}, "ebitgrowth": {"avg": 0.19712322222222223, "std_dev": 0.4963098677028238, "imp": 0.000774}, "fcffgrowth": {"avg": -322.1897401111111, "std_dev": 965.4708217800447, "imp": 0.000256}, "market_cap": {"avg": 185330477231.8889, "std_dev": 391086342355.95593, "imp": 0.023501}, "ocfgrowth": {"avg": 0.3437012222222222, "std_dev": 0.8479625678135471, "imp": 0.000204}, "p_c": {"dist": {"P": 8, "C": 1}, "imp": 0.060361}, "price_book": {"avg": 15.790866666666668, "std_dev": 23.164786997725667, "imp": 0.000667}, "price_earnings": {"avg": 21.08026666666667, "std_dev": 9.611020997141772, "imp": 0.000181}, "price_revenue": {"avg": 2.634133333333333, "std_dev": 1.88259410920145, "imp": 0.000274}, "revenuegrowth": {"avg": 0.6152126666666666, "std_dev": 1.692694416523919, "imp": 0.003276}, "sector": {"dist": {"Information Technology": 2, "Health Care": 4, "Consumer Discretionary": 1, "Materials": 1, "Industrials": 1}, "imp": 0.000562}, "year_vary": {"avg": 0.6532950060133905, "std_dev": 0.09451891762017572, "imp": 0.000754}, "delta_stock_volume": {"avg": -0.0951215505203919, "std_dev": 0.33461359310491623, "imp": 0.006112}, "delta_afterhours": {"avg": 0.0004585794645390801, "std_dev": 0.006441979711806947, "imp": 0.002062}, "actual_predicted_delta_price": {"avg": -0.1461754907371818, "std_dev": 0.7203377851805648, "imp": 0}}}, "traceback": null}
2020-08-28T18:33:07.877421892Z [2020-08-28 18:33:07,877: INFO/ForkPoolWorker-603] Task a2ml.tasks_queue.tasks_hub_api.distribution_chart_stats_task[bcc591e3-6023-4216-8330-03e05b65e925] succeeded in 46.27928587399947s: None
2020-08-28T18:33:09.202165631Z [2020-08-28 18:33:09,201: INFO/ForkPoolWorker-601] [azure]  Use presigned url: https://auger-mt-org-cinpns.s3.amazonaws.com/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAY4JC2MKVBYHZHPM6%2F20200828%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20200828T183309Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Security-Token=[REDACTED]
2020-08-28T18:33:10.928005458Z [2020-08-28 18:33:10,927: INFO/ForkPoolWorker-601] Could not load the run context. Logging offline
2020-08-28T18:33:12.299690378Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] Traceback (most recent call last):
2020-08-28T18:33:12.299711869Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py", line 65, in _validate_has_data
2020-08-28T18:33:12.299715610Z     dataflow.verify_has_data()
2020-08-28T18:33:12.299753380Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/_loggerfactory.py", line 179, in wrapper
2020-08-28T18:33:12.299762950Z     return func(*args, **kwargs)
2020-08-28T18:33:12.299810061Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 804, in verify_has_data
2020-08-28T18:33:12.299815792Z     if len(self.take(1)._to_pyrecords()) == 0:
2020-08-28T18:33:12.299915813Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 724, in _to_pyrecords
2020-08-28T18:33:12.299934745Z     span_context=to_dprep_span_context(span.get_context() if span else None)
2020-08-28T18:33:12.299937575Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/_aml_helper.py", line 38, in wrapper
2020-08-28T18:33:12.299939815Z     return send_message_func(op_code, message, cancellation_token)
2020-08-28T18:33:12.299970805Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/api.py", line 110, in execute_anonymous_activity
2020-08-28T18:33:12.299978795Z     response = self._message_channel.send_message('Engine.ExecuteActivity', message_args, cancellation_token)
2020-08-28T18:33:12.300008956Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/engine.py", line 182, in send_message
2020-08-28T18:33:12.300016657Z     raise_engine_error(response['error'])
2020-08-28T18:33:12.300056007Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/errorhandlers.py", line 10, in raise_engine_error
2020-08-28T18:33:12.300061297Z     raise ExecutionError(error_response)
2020-08-28T18:33:12.300119999Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] azureml.dataprep.api.errorhandlers.ExecutionError: 
2020-08-28T18:33:12.300124129Z Error Code: ScriptExecution.StreamAccess.NotFound
2020-08-28T18:33:12.300126529Z Failed Step: 1977d509-351d-4f05-9491-9886e756bc42
2020-08-28T18:33:12.300129129Z Error Message: ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.300131559Z   StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.300134560Z     Found no resources for the input provided: 'https://auger-mt-org-cinpns.s3.amazonaws.com/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAY4JC2MKVBYHZHPM6%2F20200828%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20200828T183309Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Security-Token=[REDACTED]'
2020-08-28T18:33:12.300141179Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.300154550Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] During handling of the above exception, another exception occurred:
2020-08-28T18:33:12.300229651Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] Traceback (most recent call last):
2020-08-28T18:33:12.300288773Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/decorators.py", line 4, in wrapper
2020-08-28T18:33:12.300293893Z     return decorated(self, *args, **kwargs)
2020-08-28T18:33:12.300360165Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/decorators.py", line 24, in wrapper
2020-08-28T18:33:12.300364945Z     return decorated(self, *args, **kwargs)
2020-08-28T18:33:12.300388886Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/azure/dataset.py", line 52, in create
2020-08-28T18:33:12.300391466Z     dataset = Dataset.Tabular.from_delimited_files(path=source)
2020-08-28T18:33:12.300457487Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/_loggerfactory.py", line 126, in wrapper
2020-08-28T18:33:12.300459867Z     return func(*args, **kwargs)
2020-08-28T18:33:12.300494738Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_factory.py", line 315, in from_delimited_files
2020-08-28T18:33:12.300497108Z     validate or infer_column_types or _is_inference_required(set_column_types))
2020-08-28T18:33:12.300597630Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_factory.py", line 771, in _transform_and_validate
2020-08-28T18:33:12.300607611Z     _validate_has_data(dataflow, 'Cannot load any data from the specified path. '
2020-08-28T18:33:12.300629111Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py", line 68, in _validate_has_data
2020-08-28T18:33:12.300631971Z     raise DatasetValidationError(error_message + '\n' + e.compliant_message, exception=e)
2020-08-28T18:33:12.300715883Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] azureml.data.dataset_error_handling.DatasetValidationError: DatasetValidationError:
2020-08-28T18:33:12.300720293Z 	Message: Cannot load any data from the specified path. Make sure the path is accessible and contains data.
2020-08-28T18:33:12.300722964Z ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.300725354Z   StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.300727684Z     Found no resources for the input provided: '[REDACTED]'
2020-08-28T18:33:12.300730114Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.300732444Z 	InnerException None
2020-08-28T18:33:12.300734753Z 	ErrorResponse 
2020-08-28T18:33:12.300737093Z {
2020-08-28T18:33:12.300739284Z     "error": {
2020-08-28T18:33:12.300741724Z         "code": "UserError",
2020-08-28T18:33:12.300744324Z         "message": "Cannot load any data from the specified path. Make sure the path is accessible and contains data.\nScriptExecutionException was caused by StreamAccessException.\n  StreamAccessException was caused by NotFoundException.\n    Found no resources for the input provided: '[REDACTED]'\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1"
2020-08-28T18:33:12.300747664Z     }
2020-08-28T18:33:12.300749944Z }
2020-08-28T18:33:12.300836806Z [2020-08-28 18:33:12,300: INFO/ForkPoolWorker-601] [azure]  DatasetValidationError:
2020-08-28T18:33:12.300839617Z 	Message: Cannot load any data from the specified path. Make sure the path is accessible and contains data.
2020-08-28T18:33:12.300841617Z ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.300843377Z   StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.300845077Z     Found no resources for the input provided: '[REDACTED]'
2020-08-28T18:33:12.300846757Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.300848416Z 	InnerException None
2020-08-28T18:33:12.300850086Z 	ErrorResponse 
2020-08-28T18:33:12.300851706Z {
2020-08-28T18:33:12.300853286Z     "error": {
2020-08-28T18:33:12.300855077Z         "code": "UserError",
2020-08-28T18:33:12.300857047Z         "message": "Cannot load any data from the specified path. Make sure the path is accessible and contains data.\nScriptExecutionException was caused by StreamAccessException.\n  StreamAccessException was caused by NotFoundException.\n    Found no resources for the input provided: '[REDACTED]'\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1"
2020-08-28T18:33:12.300859507Z     }
2020-08-28T18:33:12.300861107Z }
2020-08-28T18:33:12.301090953Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] Traceback (most recent call last):
2020-08-28T18:33:12.301154644Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py", line 65, in _validate_has_data
2020-08-28T18:33:12.301161874Z     dataflow.verify_has_data()
2020-08-28T18:33:12.301165334Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/_loggerfactory.py", line 179, in wrapper
2020-08-28T18:33:12.301168414Z     return func(*args, **kwargs)
2020-08-28T18:33:12.301209896Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 804, in verify_has_data
2020-08-28T18:33:12.301215515Z     if len(self.take(1)._to_pyrecords()) == 0:
2020-08-28T18:33:12.301218035Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 724, in _to_pyrecords
2020-08-28T18:33:12.301220905Z     span_context=to_dprep_span_context(span.get_context() if span else None)
2020-08-28T18:33:12.301292218Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/_aml_helper.py", line 38, in wrapper
2020-08-28T18:33:12.301300757Z     return send_message_func(op_code, message, cancellation_token)
2020-08-28T18:33:12.301303427Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/api.py", line 110, in execute_anonymous_activity
2020-08-28T18:33:12.301314768Z     response = self._message_channel.send_message('Engine.ExecuteActivity', message_args, cancellation_token)
2020-08-28T18:33:12.301337128Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/engine.py", line 182, in send_message
2020-08-28T18:33:12.301339798Z     raise_engine_error(response['error'])
2020-08-28T18:33:12.301440171Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/errorhandlers.py", line 10, in raise_engine_error
2020-08-28T18:33:12.301443511Z     raise ExecutionError(error_response)
2020-08-28T18:33:12.301445321Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] azureml.dataprep.api.errorhandlers.ExecutionError: 
2020-08-28T18:33:12.301447081Z Error Code: ScriptExecution.StreamAccess.NotFound
2020-08-28T18:33:12.301448721Z Failed Step: 1977d509-351d-4f05-9491-9886e756bc42
2020-08-28T18:33:12.301450351Z Error Message: ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.301452051Z   StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.301461482Z     Found no resources for the input provided: 'https://auger-mt-org-cinpns.s3.amazonaws.com/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAY4JC2MKVBYHZHPM6%2F20200828%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20200828T183309Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Security-Token=[REDACTED]'
2020-08-28T18:33:12.301466392Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.301477702Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] During handling of the above exception, another exception occurred:
2020-08-28T18:33:12.301528763Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] Traceback (most recent call last):
2020-08-28T18:33:12.301581025Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/provider_runner.py", line 82, in execute_provider
2020-08-28T18:33:12.301583704Z     result = getattr(provider, operation_name)(*args, **kwargs)
2020-08-28T18:33:12.301614905Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/azure/a2ml.py", line 12, in import_data
2020-08-28T18:33:12.301617285Z     return AzureDataset(self.ctx).create(source=source)
2020-08-28T18:33:12.301654836Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/decorators.py", line 4, in wrapper
2020-08-28T18:33:12.301660797Z     return decorated(self, *args, **kwargs)
2020-08-28T18:33:12.301684517Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/decorators.py", line 24, in wrapper
2020-08-28T18:33:12.301687067Z     return decorated(self, *args, **kwargs)
2020-08-28T18:33:12.301742078Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/azure/dataset.py", line 52, in create
2020-08-28T18:33:12.301744289Z     dataset = Dataset.Tabular.from_delimited_files(path=source)
2020-08-28T18:33:12.301746019Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/_loggerfactory.py", line 126, in wrapper
2020-08-28T18:33:12.301747909Z     return func(*args, **kwargs)
2020-08-28T18:33:12.301791139Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_factory.py", line 315, in from_delimited_files
2020-08-28T18:33:12.301794970Z     validate or infer_column_types or _is_inference_required(set_column_types))
2020-08-28T18:33:12.301837791Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_factory.py", line 771, in _transform_and_validate
2020-08-28T18:33:12.301840450Z     _validate_has_data(dataflow, 'Cannot load any data from the specified path. '
2020-08-28T18:33:12.301864802Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py", line 68, in _validate_has_data
2020-08-28T18:33:12.301867241Z     raise DatasetValidationError(error_message + '\n' + e.compliant_message, exception=e)
2020-08-28T18:33:12.301910872Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] azureml.data.dataset_error_handling.DatasetValidationError: DatasetValidationError:
2020-08-28T18:33:12.301916633Z 	Message: Cannot load any data from the specified path. Make sure the path is accessible and contains data.
2020-08-28T18:33:12.301918723Z ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.301920593Z   StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.301922403Z     Found no resources for the input provided: '[REDACTED]'
2020-08-28T18:33:12.301924362Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.301926182Z 	InnerException None
2020-08-28T18:33:12.301928003Z 	ErrorResponse 
2020-08-28T18:33:12.301929653Z {
2020-08-28T18:33:12.301931373Z     "error": {
2020-08-28T18:33:12.301933113Z         "code": "UserError",
2020-08-28T18:33:12.301935253Z         "message": "Cannot load any data from the specified path. Make sure the path is accessible and contains data.\nScriptExecutionException was caused by StreamAccessException.\n  StreamAccessException was caused by NotFoundException.\n    Found no resources for the input provided: '[REDACTED]'\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1"
2020-08-28T18:33:12.301943213Z     }
2020-08-28T18:33:12.301944944Z }
2020-08-28T18:33:12.302522927Z [2020-08-28 18:33:12,302: INFO/ForkPoolWorker-601] a2ml.tasks_queue.tasks_hub_api.import_data_task[2ed813cc-6bf1-4b3d-ab91-b312d8effb41]: Send JSON data to Hub: {"type": "ProviderInfoUpdate", "hub_info": {"pipeline_id": null, "project_file": {"id": 3014, "url": "s3://auger-mt-org-cinpns/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv", "name": "iris-2.csv", "object": "project_file", "status": "processing", "deleted": false, "industry": null, "image_url": null, "providers": [], "created_at": "2020-08-28T18:33:03.560Z", "project_id": 860, "updated_at": "2020-08-28T18:33:03.716Z", "description": null, "content_type": null, "error_message": null, "providers_data": {}}, "project_name": "a2mlworkspacestaging", "project_path": "s3://auger-mt-org-cinpns/workspace/projects/alex-mt-with-azure", "experiment_id": null, "prediction_id": null, "cluster_task_id": 89287, "experiment_name": null, "project_file_id": 3014, "experiment_session": null, "dataset_manifest_id": null, "experiment_session_id": null}, "provider": "azure", "provider_info": {"azure": {}}}
2020-08-28T18:33:12.882683412Z [2020-08-28 18:33:12,882: INFO/ForkPoolWorker-601] a2ml.tasks_queue.tasks_hub_api.import_data_task[2ed813cc-6bf1-4b3d-ab91-b312d8effb41]: Send JSON data to Hub: {"type": "TaskResult", "provider": "azure", "hub_info": {"pipeline_id": null, "project_file": {"id": 3014, "url": "s3://auger-mt-org-cinpns/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv", "name": "iris-2.csv", "object": "project_file", "status": "processing", "deleted": false, "industry": null, "image_url": null, "providers": [], "created_at": "2020-08-28T18:33:03.560Z", "project_id": 860, "updated_at": "2020-08-28T18:33:03.716Z", "description": null, "content_type": null, "error_message": null, "providers_data": {}}, "project_name": "a2mlworkspacestaging", "project_path": "s3://auger-mt-org-cinpns/workspace/projects/alex-mt-with-azure", "experiment_id": null, "prediction_id": null, "cluster_task_id": 89287, "experiment_name": null, "project_file_id": 3014, "experiment_session": null, "dataset_manifest_id": null, "experiment_session_id": null}, "status": "success", "runtime": 8.435573101043701, "result": {"azure": {"result": false, "data": "DatasetValidationError:\n\tMessage: Cannot load any data from the specified path. Make sure the path is accessible and contains data.\nScriptExecutionException was caused by StreamAccessException.\n  StreamAccessException was caused by NotFoundException.\n    Found no resources for the input provided: '[REDACTED]'\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1\n\tInnerException None\n\tErrorResponse \n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"Cannot load any data from the specified path. Make sure the path is accessible and contains data.\\nScriptExecutionException was caused by StreamAccessException.\\n  StreamAccessException was caused by NotFoundException.\\n    Found no resources for the input provided: '[REDACTED]'\\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1\"\n    }\n}"}}, "traceback": null}
2020-08-28T18:33:13.346311951Z [2020-08-28 18:33:13,346: INFO/ForkPoolWorker-601] Task a2ml.tasks_queue.tasks_hub_api.import_data_task[2ed813cc-6bf1-4b3d-ab91-b312d8effb41] succeeded in 8.899521411003661s: None