From 026aac0e5118919f643adeafb82018868522076a Mon Sep 17 00:00:00 2001 From: James Kent Date: Thu, 4 Mar 2021 17:39:16 -0600 Subject: [PATCH 01/24] convert examples to sphinx gallery notebooks --- examples/README.rst | 4 + examples/plot_quickstart.py | 278 +++++++++++++++++++++ examples/plot_scikit_learn_integration.py | 65 +++++ examples/plot_simple_graph.py | 59 +++++ examples/plot_speech_sentiment_analysis.py | 55 ++++ examples/plot_vision_apis.py | 61 +++++ 6 files changed, 522 insertions(+) create mode 100644 examples/README.rst create mode 100644 examples/plot_quickstart.py create mode 100644 examples/plot_scikit_learn_integration.py create mode 100644 examples/plot_simple_graph.py create mode 100644 examples/plot_speech_sentiment_analysis.py create mode 100644 examples/plot_vision_apis.py diff --git a/examples/README.rst b/examples/README.rst new file mode 100644 index 000000000..7fce150ad --- /dev/null +++ b/examples/README.rst @@ -0,0 +1,4 @@ +Pliers Gallery +================== + +Below is a gallery of examples \ No newline at end of file diff --git a/examples/plot_quickstart.py b/examples/plot_quickstart.py new file mode 100644 index 000000000..ad36c64f1 --- /dev/null +++ b/examples/plot_quickstart.py @@ -0,0 +1,278 @@ +""" +This is my example script +========================= + +This example doesn't do much, it just makes a simple plot +""" +# To add a new cell, type '# %%' +# To add a new markdown cell, type '# %% [markdown]' + +# %% +# Example-specific imports are in individual cells below; here we +# just import stuff we reuse repeatedly. +from pliers.extractors import merge_results +from pliers.tests.utils import get_test_data_path +from os.path import join +from matplotlib import pyplot as plt + +# %% [markdown] +# # Pliers Quickstart +# This notebook contains a few examples that demonstrate how to extract various kinds of features with pliers. We start with very simple examples, and gradually scale up in complexity. +# +# ## Face detection +# This first example uses the face_recognition package's location extraction method to detect the location of Barack Obama's face within a single image. The tools used to do this are completely local (i.e., the image isn't sent to an external API). +# +# We output the result as a pandas DataFrame; the `'face_locations`' column contains the coordinates of the bounding box in CSS format (i.e., top, right, bottom, and left edges). + +# %% +from pliers.extractors import FaceRecognitionFaceLocationsExtractor + +# A picture of Barack Obama +image = join(get_test_data_path(), 'image', 'obama.jpg') + +# Initialize Extractor +ext = FaceRecognitionFaceLocationsExtractor() + +# Apply Extractor to image +result = ext.transform(image) + +result.to_df() + +# %% [markdown] +# ## Face detection with multiple inputs +# What if we want to run the face detector on multiple images? Naively, we could of course just loop over input images and apply the Extractor to each one. But pliers makes this even easier for us, by natively accepting iterables as inputs. The following code is almost identical to the above snippet. The only notable difference is that, because the result we get back is now also a list (because the features extracted from each image are stored separately), we need to explicitly combine the results using the `merge_results` utility. + +# %% +from pliers.extractors import FaceRecognitionFaceLocationsExtractor, merge_results + +images = ['apple.jpg', 'obama.jpg', 'thai_people.jpg'] +images = [join(get_test_data_path(), 'image', img) for img in images] + +ext = FaceRecognitionFaceLocationsExtractor() +results = ext.transform(images) +df = merge_results(results) +df + +# %% [markdown] +# Note how the merged pandas DataFrame contains 5 rows, even though there were only 3 input images. The reason is that there are 5 detected faces across the inputs (0 in the first image, 1 in the second, and 4 in the third). You can discern the original sources from the `stim_name` and `source_file` columns. +# +# ## Face detection using a remote API +# The above examples use an entirely local package (`face_recognition`) for feature extraction. In this next example, we use the Google Cloud Vision API to extract various face-related attributes from an image of Barack Obama. The syntax is identical to the first example, save for the use of the `GoogleVisionAPIFaceExtractor` instead of the `FaceRecognitionFaceLocationsExtractor`. Note, however, that successful execution of this code requires you to have a `GOOGLE_APPLICATION_CREDENTIALS` environment variable pointing to your Google credentials JSON file. See the documentation for more details. + +# %% +from pliers.extractors import GoogleVisionAPIFaceExtractor + +ext = GoogleVisionAPIFaceExtractor() +image = join(get_test_data_path(), 'image', 'obama.jpg') +result = ext.transform(image) + +result.to_df(format='long', timing=False, object_id=False) + +# %% [markdown] +# Notice that the output in this case contains many more features. That's because the Google face recognition service gives us back a lot more information than just the location of the face within the image. Also, the example illustrates our ability to control the format of the output, by returning the data in "long" format, and suppressing output of columns that are uninformative in this context. +# %% [markdown] +# ## Sentiment analysis on text +# Here we use the VADER sentiment analyzer (Hutto & Gilbert, 2014) implemented in the `nltk` package to extract sentiment for (a) a coherent block of text, and (b) each word in the text separately. This example also introduces the `Stim` hierarchy of objects explicitly, whereas the initialization of `Stim` objects was implicit in the previous examples. +# +# #### Treat text as a single block + +# %% +from pliers.stimuli import TextStim, ComplexTextStim +from pliers.extractors import VADERSentimentExtractor, merge_results + +raw = """We're not claiming that VADER is a very good sentiment analysis tool. +Sentiment analysis is a really, really difficult problem. But just to make a +point, here are some clearly valenced words: disgusting, wonderful, poop, +sunshine, smile.""" + +# First example: we treat all text as part of a single token +text = TextStim(text=raw) + +ext = VADERSentimentExtractor() +results = ext.transform(text) +results.to_df() + +# %% [markdown] +# #### Analyze each word individually + +# %% +# Second example: we construct a ComplexTextStim, which will +# cause each word to be represented as a separate TextStim. +text = ComplexTextStim(text=raw) + +ext = VADERSentimentExtractor() +results = ext.transform(text) + +# Because results is a list of ExtractorResult objects +# (one per word), we need to merge the results explicitly. +df = merge_results(results, object_id=False) +df.head(10) + +# %% [markdown] +# ## Extract chromagram from an audio clip +# We have an audio clip, and we'd like to compute its chromagram (i.e., to extract the normalized energy in each of the 12 pitch classes). This is trivial thanks to pliers' support for the `librosa` package, which contains all kinds of useful functions for spectral feature extraction. + +# %% +from pliers.extractors import ChromaSTFTExtractor + +audio = join(get_test_data_path(), 'audio', 'barber.wav') +# Audio is sampled at 11KHz; let's compute power in 1 sec bins +ext = ChromaSTFTExtractor(hop_length=11025) +result = ext.transform(audio).to_df() +result.head(10) + + +# %% +# And a plot of the chromagram... +plt.imshow(result.iloc[:, 4:].values.T, aspect='auto') + +# %% [markdown] +# ## Sentiment analysis on speech transcribed from audio +# So far all of our examples involve the application of a feature extractor to an input of the expected modality (e.g., a text sentiment analyzer applied to text, a face recognizer applied to an image, etc.). But we often want to extract features that require us to first *convert* our input to a different modality. Let's see how pliers handles this kind of situation. +# +# Say we have an audio clip. We want to run sentiment analysis on the audio. This requires us to first transcribe any speech contained in the audio. As it turns out, we don't have to do anything special here; we can just feed an audio clip directly to an `Extractor` class that expects a text input (e.g., the `VADER` sentiment analyzer we used earlier). How? Magic! Pliers is smart enough to implicitly convert the audio clip to a `ComplexTextStim` internally. By default, it does this using IBM's Watson speech transcription API. Which means you'll need to make sure your API key is set up properly in order for the code below to work. (But if you'd rather use, say, Google's Cloud Speech API, you could easily configure pliers to make that the default for audio-to-text conversion.) + +# %% +audio = join(get_test_data_path(), 'audio', 'homer.wav') +ext = VADERSentimentExtractor() +result = ext.transform(audio) +df = merge_results(result, object_id=False) +df + +# %% [markdown] +# ## Object recognition on selectively sampled video frames +# A common scenario when analyzing video is to want to apply some kind of feature extraction tool to individual video frames (i.e., still images). Often, there's little to be gained by analyzing every single frame, so we want to sample frames with some specified frequency. The following example illustrates how easily this can be accomplished in pliers. It also demonstrates the concept of *chaining* multiple Transformer objects. We first convert a video to a series of images, and then apply an object-detection `Extractor` to each image. +# +# Note, as with other examples above, that the `ClarifaiAPIImageExtractor` wraps the Clarifai object recognition API, so you'll need to have an API key set up appropriately (if you don't have an API key, and don't want to set one up, you can replace `ClarifaiAPIExtractor` with `TensorFlowInceptionV3Extractor` to get similar, though not quite as accurate, results). + +# %% +from pliers.filters import FrameSamplingFilter +from pliers.extractors import ClarifaiAPIImageExtractor, merge_results + +video = join(get_test_data_path(), 'video', 'small.mp4') + +# Sample 2 frames per second +sampler = FrameSamplingFilter(hertz=2) +frames = sampler.transform(video) + +ext = ClarifaiAPIImageExtractor() +results = ext.transform(frames) +df = merge_results(results, ) +df + +# %% [markdown] +# The resulting data frame has 41 columns (!), most of which are individual object labels like 'lego', 'toy', etc., selected for us by the Clarifai API on the basis of the content detected in the video (we could have also forced the API to return values for specific labels). +# %% [markdown] +# ## Multiple extractors +# So far we've only used a single `Extractor` at a time to extract information from our inputs. Now we'll start to get a little more ambitious. Let's say we have a video that we want to extract *lots* of different features from--in multiple modalities. Specifically, we want to extract all of the following: +# +# * Object recognition and face detection applied to every 10th frame of the video; +# * A second-by-second estimate of spectral power in the speech frequency band; +# * A word-by-word speech transcript; +# * Estimates of several lexical properties (e.g., word length, written word frequency, etc.) for every word in the transcript; +# * Sentiment analysis applied to the entire transcript. +# +# We've already seen some of these features extracted individually, but now we're going to extract *all* of them at once. As it turns out, the code looks almost exactly like a concatenated version of several of our examples above. + +# %% +from pliers.tests.utils import get_test_data_path +from os.path import join +from pliers.filters import FrameSamplingFilter +from pliers.converters import GoogleSpeechAPIConverter +from pliers.extractors import (ClarifaiAPIImageExtractor, GoogleVisionAPIFaceExtractor, + ComplexTextExtractor, PredefinedDictionaryExtractor, + STFTAudioExtractor, VADERSentimentExtractor, + merge_results) + +video = join(get_test_data_path(), 'video', 'obama_speech.mp4') + +# Store all the returned features in a single list (nested lists +# are fine, the merge_results function will flatten everything) +features = [] + +# Sample video frames and apply the image-based extractors +sampler = FrameSamplingFilter(every=10) +frames = sampler.transform(video) + +obj_ext = ClarifaiAPIImageExtractor() +obj_features = obj_ext.transform(frames) +features.append(obj_features) + +face_ext = GoogleVisionAPIFaceExtractor() +face_features = face_ext.transform(frames) +features.append(face_features) + +# Power in speech frequencies +stft_ext = STFTAudioExtractor(freq_bins=[(100, 300)]) +speech_features = stft_ext.transform(video) +features.append(speech_features) + +# Explicitly transcribe the video--we could also skip this step +# and it would be done implicitly, but this way we can specify +# that we want to use the Google Cloud Speech API rather than +# the package default (IBM Watson) +text_conv = GoogleSpeechAPIConverter() +text = text_conv.transform(video) + +# Text-based features +text_ext = ComplexTextExtractor() +text_features = text_ext.transform(text) +features.append(text_features) + +dict_ext = PredefinedDictionaryExtractor( + variables=['affect/V.Mean.Sum', 'subtlexusfrequency/Lg10WF']) +norm_features = dict_ext.transform(text) +features.append(norm_features) + +sent_ext = VADERSentimentExtractor() +sent_features = sent_ext.transform(text) +features.append(sent_features) + +# Ask for data in 'long' format, and code extractor name as a separate +# column instead of prepending it to feature names. +df = merge_results(features, format='long', extractor_names='column') + +# Output rows in a sensible order +df.sort_values(['extractor', 'feature', 'onset', 'duration', 'order']).head(10) + +# %% [markdown] +# The resulting pandas DataFrame is quite large; even for our 9-second video, we get back over 3,000 rows! Importantly, though, the DataFrame contains all kinds of metadata that makes it easy to filter and sort the results in whatever way we might want to (e.g., we can filter on the extractor, stim class, onset or duration, etc.). +# %% [markdown] +# ## Multiple extractors with a Graph +# The above code listing is already pretty terse, and has the advantage of being explicit about every step. But if it's brevity we're after, pliers is happy to oblige us. The package includes a `Graph` abstraction that allows us to load an arbitrary number of `Transformer` into a graph, and execute them all in one shot. The code below is functionally identical to the last example, but only about the third of the length. It also requires fewer imports, since `Transformer` objects that we don't need to initialize with custom arguments can be passed to the `Graph` as strings. +# +# The upshot of all this is that, in just a few lines of Python code, we're abvle to extract a broad range of multimodal features from video, image, audio or text inputs, using state-of-the-art tools and services! + +# %% +from pliers.tests.utils import get_test_data_path +from os.path import join +from pliers.graph import Graph +from pliers.filters import FrameSamplingFilter +from pliers.extractors import (PredefinedDictionaryExtractor, STFTAudioExtractor, + merge_results) + + +video = join(get_test_data_path(), 'video', 'obama_speech.mp4') + +# Define nodes +nodes = [ + (FrameSamplingFilter(every=10), + ['ClarifaiAPIImageExtractor', 'GoogleVisionAPIFaceExtractor']), + (STFTAudioExtractor(freq_bins=[(100, 300)])), + ('GoogleSpeechAPIConverter', + ['ComplexTextExtractor', + PredefinedDictionaryExtractor(['affect/V.Mean.Sum', + 'subtlexusfrequency/Lg10WF']), + 'VADERSentimentExtractor']) +] + +# Initialize and execute Graph +g = Graph(nodes) + +# Arguments to merge_results can be passed in here +df = g.transform(video, format='long', extractor_names='column') + +# Output rows in a sensible order +df.sort_values(['extractor', 'feature', 'onset', 'duration', 'order']).head(10) + + diff --git a/examples/plot_scikit_learn_integration.py b/examples/plot_scikit_learn_integration.py new file mode 100644 index 000000000..2f416c665 --- /dev/null +++ b/examples/plot_scikit_learn_integration.py @@ -0,0 +1,65 @@ +""" +This is my example script +========================= + +This example doesn't do much, it just makes a simple plot +""" +# To add a new cell, type '# %%' +# To add a new markdown cell, type '# %% [markdown]' +# %% [markdown] +# Scikit-Learn Integration +# ================== +# Example using `pliers` as a node in a typical scikit-learn pipeline. Example code taken from scikit-learn's [website](http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html) + +# %% +# Data setup +from glob import glob +from pliers.tests.utils import get_test_data_path +from os.path import join +import numpy as np + +X = glob(join(get_test_data_path(), 'image', '*.jpg')) +# Just use random classes since this is just an example +y = np.random.randint(0, 3, len(X)) +print('Number of images found: %d' % len(X)) + + +# %% +# Pliers setup +from pliers.graph import Graph +from pliers.utils.scikit import PliersTransformer +g = Graph({'roots':[{'transformer':'BrightnessExtractor'}, + {'transformer':'SharpnessExtractor'}, + {'transformer':'VibranceExtractor'}]}) + + +# %% +# Sklearn setup +from sklearn import svm +from sklearn.feature_selection import SelectKBest +from sklearn.feature_selection import f_regression +from sklearn.pipeline import Pipeline + +# ANOVA SVM-C Pipeline +pliers_transformer = PliersTransformer(g) +anova_filter = SelectKBest(f_regression, k=2) +clf = svm.SVC(kernel='linear') +pipeline = Pipeline([('pliers', pliers_transformer), ('anova', anova_filter), ('svc', clf)]) + + +# %% +# Fit and get training accuracy +pipeline.set_params(svc__C=.1).fit(X, y) +prediction = pipeline.predict(X) +pipeline.score(X, y) + + +# %% +# Getting the selected features chosen by anova_filter +pipeline.named_steps['anova'].get_support() + + +# %% + + + diff --git a/examples/plot_simple_graph.py b/examples/plot_simple_graph.py new file mode 100644 index 000000000..85c77cb55 --- /dev/null +++ b/examples/plot_simple_graph.py @@ -0,0 +1,59 @@ +""" +This is my example script +========================= + +This example doesn't do much, it just makes a simple plot +""" +# To add a new cell, type '# %%' +# To add a new markdown cell, type '# %% [markdown]' +# %% [markdown] +# Simple Graph +# ================== +# Example configuring and executing a simple graph. The graph constructed runs on video inputs, and extracts the length of visual text, the amount of vibrance in each frame, and the length of spoken words. + +# %% +from pliers.tests.utils import get_test_data_path +from os.path import join +from pliers.stimuli import VideoStim +from pliers.converters import (VideoToAudioConverter, + TesseractConverter, + WitTranscriptionConverter) +from pliers.extractors import (ExtractorResult, + VibranceExtractor, + LengthExtractor) +from pliers.graph import Graph + + +# %% +# Load the stimulus +filename = join(get_test_data_path(), 'video', 'obama_speech.mp4') +video = VideoStim(filename) + + +# %% +# Configure the graph nodes +nodes = [([(TesseractConverter(), + [(LengthExtractor())]), + (VibranceExtractor(),)]), + (VideoToAudioConverter(), + [(WitTranscriptionConverter(), + [(LengthExtractor())])])] + + +# %% +# Construct and run the graph +graph = Graph(nodes) +graph.run(video) + + +# %% +# Save a display of the graph +graph.draw('pliers_simple_graph.png') + +# %% [markdown] +# ![title](pliers_simple_graph.png) + +# %% + + + diff --git a/examples/plot_speech_sentiment_analysis.py b/examples/plot_speech_sentiment_analysis.py new file mode 100644 index 000000000..af29057b8 --- /dev/null +++ b/examples/plot_speech_sentiment_analysis.py @@ -0,0 +1,55 @@ +""" +This is my example script +========================= + +This example doesn't do much, it just makes a simple plot +""" +# To add a new cell, type '# %%' +# To add a new markdown cell, type '# %% [markdown]' +# %% [markdown] +# Speech sentiment analysis +# =================== +# In this notebook we illustrate the power of pliers converters and extractors in a single pipeline. Specifically, we first run a state-of-the-art speech recognition API to transcribe the text of an audio clip. Then, we run a sentiment analysis API to extract the emotion ratings of the spoken words. The audio clip of this example is a short clip of an Obama administration press conference. +# +# Note: the analysis is not using any audio features to assist emotion extraction. It is simply only using the text transcribed from the audio + +# %% +from pliers.tests.utils import get_test_data_path +from os.path import join +from pliers.stimuli import AudioStim +from pliers.graph import Graph + + +# %% +# Configure our stimulus and extraction graph +stim = AudioStim(join(get_test_data_path(), 'video', 'obama_speech.mp4')) +nodes = [ + { + 'transformer':'IBMSpeechAPIConverter', + 'parameters':{'resolution':'phrases'}, + 'children':[ + { + 'transformer':'IndicoAPITextExtractor', + 'parameters':{'models':['emotion']} + } + ] + } +] +graph = Graph(nodes) + +# %% [markdown] +# **Parameters**: +# +# IBMSpeechAPIConverter - `resolution` specifies how we should chunk the text; using phrases provides better results for emotion analysis, as opposed to word-by-word analysis +# +# IndicoAPITextExtractor - `models` specifies which analysis models to run using the Indico API; 'emotion' will give back five emotion ratings (anger, joy, fear, sadness, surprise) of the text + +# %% +results = graph.run(stim) +results + + +# %% + + + diff --git a/examples/plot_vision_apis.py b/examples/plot_vision_apis.py new file mode 100644 index 000000000..8bfb1fd0a --- /dev/null +++ b/examples/plot_vision_apis.py @@ -0,0 +1,61 @@ +""" +This is my example script +========================= + +This example doesn't do much, it just makes a simple plot +""" +# To add a new cell, type '# %%' +# To add a new markdown cell, type '# %% [markdown]' +# %% [markdown] +# Comparing Vision APIs +# ============ +# This notebook features the various computer vision APIs that pliers interfaces with. These include the Google Vision, Clarifai, and Indico APIs. To compare their perfomance, image recognition features are extracted from an image of an apple. + +# %% +from pliers.tests.utils import get_test_data_path +from os.path import join +from pliers.extractors import (ClarifaiAPIImageExtractor, GoogleVisionAPILabelExtractor) +from pliers.stimuli.image import ImageStim +from pliers.graph import Graph + + +# %% +# Load the stimulus +stim_path = join(get_test_data_path(), 'image', 'apple.jpg') +stim = ImageStim(stim_path) + + +# %% +# Configure extractions +clarifai_ext = ClarifaiAPIImageExtractor() +google_ext = GoogleVisionAPILabelExtractor() + + +# %% +# Run extractions +clarifai_res = clarifai_ext.transform(stim) +indico_res = indico_ext.transform(stim) +google_res = google_ext.transform(stim) + + +# %% +clarifai_res.to_df() + + +# %% +df = indico_res.to_df() +df.loc[:, df.sum() > 0.5] + + +# %% +google_res.to_df() + +# %% [markdown] +# Summary +# -------- +# For the apple image, it is clear that the Google and Clarifai APIs perform best, as both have "apple", "food", and "fruit" in the top features. On the other hand, the only Indico API feature with a probability over 0.5 is "pomegranate". Furthermore, the Google API seems to also be less noisy than the Clarifai API, where several object labels have probabilities over 0.9. + +# %% + + + From 34e2b46a8f15828743bd544a258abb79fe32e49e Mon Sep 17 00:00:00 2001 From: James Kent Date: Thu, 4 Mar 2021 17:40:18 -0600 Subject: [PATCH 02/24] add doctest and sphinx-gallery --- docs/conf.py | 30 ++- docs/quickstart.rst | 450 ++++++-------------------------------------- docs/results.rst | 123 ++++++++---- 3 files changed, 172 insertions(+), 431 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 154ce7bb1..d90d3dd93 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,10 +29,37 @@ # # needs_sphinx = '1.0' +# sphinx-doctest settings +doctest_global_setup = ''' +import os +import pandas as pd +pd.set_option("display.max_columns", None) +pd.set_option('display.max_colwidth', 0) +pd.set_option('display.expand_frame_repr', False) +''' + +#sphinx-gallery settings +sphinx_gallery_conf = { + 'examples_dirs': '../examples', # path to your example scripts + 'gallery_dirs': 'auto_examples', # path to where to save gallery generated output + 'expected_failing_examples': [], +} + +if not os.environ.get('WIT_AI_API_KEY'): + sphinx_gallery_conf['expected_failing_examples'].append('../examples/plot_simple_graph.py') + sphinx_gallery_conf['expected_failing_examples'].append('../examples/plot_speech_sentiment_analysis.py') + +if not os.environ.get('CLARIFAI_API_KEY'): + sphinx_gallery_conf['expected_failing_examples'].append('../examples/plot_vision_apis.py') + +if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS'): + sphinx_gallery_conf['expected_failing_examples'].append('../examples/plot_quickstart.py') + # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc', + 'sphinx.ext.doctest', 'sphinx.ext.autosummary', 'sphinx.ext.autosectionlabel', 'sphinx.ext.intersphinx', @@ -40,7 +67,8 @@ 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon'] + 'sphinx.ext.napoleon', + 'sphinx_gallery.gen_gallery'] # Generate stubs autosummary_generate = True diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 525c1d05a..bc4674762 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -14,7 +14,7 @@ This first example uses the face_recognition package's location extraction metho We output the result as a pandas DataFrame; the 'face_locations' column contains the coordinates of the bounding box in CSS format (i.e., top, right, bottom, and left edges). -:: +.. testcode:: from pliers.extractors import FaceRecognitionFaceLocationsExtractor from os.path import join @@ -29,57 +29,13 @@ We output the result as a pandas DataFrame; the 'face_locations' column contains # Apply Extractor to image result = ext.transform(image) - result.to_df() + print(result.to_df()) -.. raw:: html +.. testoutput:: + :options: -ELLIPSIS, +NORMALIZE_WHITESPACE - -
- - - - - - - - - - - - - - - - - - - - - - -
onsetorderdurationobject_idface_locations
0NaNNaNNaN0(142, 349, 409, 82)
-
+ order duration onset object_id face_locations + 0 NaN NaN NaN 0 (142, 349, 409, 82) @@ -95,7 +51,7 @@ result we get back is now also a list (because the features extracted from each image are stored separately), we need to explicitly combine the results using the ``merge_results`` utility. -:: +.. testcode:: from pliers.extractors import FaceRecognitionFaceLocationsExtractor, merge_results @@ -105,110 +61,18 @@ the results using the ``merge_results`` utility. ext = FaceRecognitionFaceLocationsExtractor() results = ext.transform(images) df = merge_results(results) - df + df.sort_index(axis=1, inplace=True) + print(df) -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
source_fileonsetclassfilenamestim_namehistorydurationorderobject_idFaceRecognitionFaceLocationsExtractor#face_locations
0/Users/tal/Dropbox/Code/pliers/pliers/tests/da...NaNImageStim/Users/tal/Dropbox/Code/pliers/pliers/tests/da...obama.jpgNaNNaN0(142, 349, 409, 82)
1/Users/tal/Dropbox/Code/pliers/pliers/tests/da...NaNImageStim/Users/tal/Dropbox/Code/pliers/pliers/tests/da...thai_people.jpgNaNNaN0(236, 862, 325, 772)
2/Users/tal/Dropbox/Code/pliers/pliers/tests/da...NaNImageStim/Users/tal/Dropbox/Code/pliers/pliers/tests/da...thai_people.jpgNaNNaN1(104, 581, 211, 474)
3/Users/tal/Dropbox/Code/pliers/pliers/tests/da...NaNImageStim/Users/tal/Dropbox/Code/pliers/pliers/tests/da...thai_people.jpgNaNNaN2(365, 782, 454, 693)
4/Users/tal/Dropbox/Code/pliers/pliers/tests/da...NaNImageStim/Users/tal/Dropbox/Code/pliers/pliers/tests/da...thai_people.jpgNaNNaN3(265, 444, 355, 354)
-
+.. testoutput:: + :options: +ELLIPSIS, +NORMALIZE_WHITESPACE + FaceRecognitionFaceLocationsExtractor#face_locations class duration filename history object_id onset order source_file stim_name + 0 (142, 349, 409, 82) ImageStim NaN ...obama.jpg 0 NaN NaN ...obama.jpg obama.jpg + 1 (236, 862, 325, 772) ImageStim NaN ...thai_people.jpg 0 NaN NaN ...thai_people.jpg thai_people.jpg + 2 (104, 581, 211, 474) ImageStim NaN ...thai_people.jpg 1 NaN NaN ...thai_people.jpg thai_people.jpg + 3 (365, 782, 454, 693) ImageStim NaN ...thai_people.jpg 2 NaN NaN ...thai_people.jpg thai_people.jpg + 4 (265, 444, 355, 354) ImageStim NaN ...thai_people.jpg 3 NaN NaN ...thai_people.jpg thai_people.jpg Note how the merged pandas DataFrame contains 5 rows, even though there @@ -230,7 +94,8 @@ successful execution of this code requires you to have a ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable pointing to your Google credentials JSON file. See the documentation for more details. -:: +.. testcode:: + :skipif: os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is None from pliers.extractors import GoogleVisionAPIFaceExtractor @@ -241,7 +106,10 @@ Google credentials JSON file. See the documentation for more details. result.to_df(format='long', timing=False, object_id=False) +.. testoutput:: + :skipif: os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is None + TODO .. raw:: html @@ -600,7 +468,7 @@ previous examples. Treat text as a single block ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:: +.. testcode:: from pliers.stimuli import TextStim, ComplexTextStim from pliers.extractors import VADERSentimentExtractor, merge_results @@ -615,63 +483,30 @@ Treat text as a single block ext = VADERSentimentExtractor() results = ext.transform(text) - results.to_df() - + print(results.to_df()) + +.. testoutput:: + :options: +NORMALIZE_WHITESPACE + + order duration onset object_id sentiment_neg sentiment_neu sentiment_pos sentiment_compound + 0 NaN NaN NaN 0 0.19 0.51 0.3 0.6787 +Analyze each word individually +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. raw:: html +.. testsetup:: vader -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
onsetorderdurationobject_idsentiment_negsentiment_neusentiment_possentiment_compound
0NaNNaNNaN00.190.510.30.6787
-
- - + raw = """We're not claiming that VADER is a very good sentiment analysis tool. + Sentiment analysis is a really, really difficult problem. But just to make a + point, here are some clearly valenced words: disgusting, wonderful, poop, + sunshine, smile.""" -Analyze each word individually -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:: +.. testcode:: vader # Second example: we construct a ComplexTextStim, which will # cause each word to be represented as a separate TextStim. @@ -683,198 +518,23 @@ Analyze each word individually # Because results is a list of ExtractorResult objects # (one per word), we need to merge the results explicitly. df = merge_results(results, object_id=False) - df.head(10) - + df.sort_index(axis=1, inplace=True) + print(df.head(10)) -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
source_fileonsetclassfilenamestim_namehistorydurationorderVADERSentimentExtractor#sentiment_compoundVADERSentimentExtractor#sentiment_negVADERSentimentExtractor#sentiment_neuVADERSentimentExtractor#sentiment_pos
0NaN0.0TextStimNaNtext[We]ComplexTextStim->ComplexTextIterator/TextStimNaN00.00000.01.00.0
1NaN0.0TextStimNaNtext['re]ComplexTextStim->ComplexTextIterator/TextStimNaN10.00000.01.00.0
2NaN0.0TextStimNaNtext[not]ComplexTextStim->ComplexTextIterator/TextStimNaN20.00000.01.00.0
3NaN0.0TextStimNaNtext[claiming]ComplexTextStim->ComplexTextIterator/TextStimNaN30.00000.01.00.0
4NaN0.0TextStimNaNtext[that]ComplexTextStim->ComplexTextIterator/TextStimNaN40.00000.01.00.0
5NaN0.0TextStimNaNtext[VADER]ComplexTextStim->ComplexTextIterator/TextStimNaN50.00000.01.00.0
6NaN0.0TextStimNaNtext[is]ComplexTextStim->ComplexTextIterator/TextStimNaN60.00000.01.00.0
7NaN0.0TextStimNaNtext[a]ComplexTextStim->ComplexTextIterator/TextStimNaN70.00000.00.00.0
8NaN0.0TextStimNaNtext[very]ComplexTextStim->ComplexTextIterator/TextStimNaN80.00000.01.00.0
9NaN0.0TextStimNaNtext[good]ComplexTextStim->ComplexTextIterator/TextStimNaN90.44040.00.01.0
-
- + VADERSentimentExtractor#sentiment_compound VADERSentimentExtractor#sentiment_neg VADERSentimentExtractor#sentiment_neu VADERSentimentExtractor#sentiment_pos class duration filename history onset order source_file stim_name + 0 0.0000 0.0 1.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 0 NaN text[We] + 1 0.0000 0.0 1.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 1 NaN text['re] + 2 0.0000 0.0 1.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 2 NaN text[not] + 3 0.0000 0.0 1.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 3 NaN text[claiming] + 4 0.0000 0.0 1.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 4 NaN text[that] + 5 0.0000 0.0 1.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 5 NaN text[VADER] + 6 0.0000 0.0 1.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 6 NaN text[is] + 7 0.0000 0.0 0.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 7 NaN text[a] + 8 0.0000 0.0 1.0 0.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 8 NaN text[very] + 9 0.4404 0.0 0.0 1.0 TextStim NaN NaN ComplexTextStim->ComplexTextIterator/TextStim 0.0 9 NaN text[good] Extract chromagram from an audio clip diff --git a/docs/results.rst b/docs/results.rst index a567fe497..e62c8fa2c 100644 --- a/docs/results.rst +++ b/docs/results.rst @@ -9,18 +9,18 @@ The ExtractorResult class ------------------------- Calling ``transform()`` on an instantiated |Extractor| returns an object of class |ExtractorResult|. This is a lightweight container that contains all of the extracted feature information returned by the |Extractor|, references to the |Stim| and |Extractor| objects used to generate the result, and both "raw" and processed forms of the results returned by the |Extractor| (though note that many Extractors don't set a ``.raw`` property). For example: -:: +.. doctest:: + >>> from os.path import join + >>> from pliers.tests.utils import get_test_data_path + >>> jpg = join(get_test_data_path(), 'image', 'obama.jpg') >>> from pliers.extractors import FaceRecognitionFaceLocationsExtractor >>> ext = FaceRecognitionFaceLocationsExtractor() - >>> result = ext.transform(image) - + >>> result = ext.transform(jpg) >>> result.stim.name 'obama.jpg' - >>> result.extractor.name 'FaceRecognitionFaceLocationsExtractor' - >>> result.raw [(142, 349, 409, 82)] @@ -30,10 +30,22 @@ Exporting results to pandas DataFrames ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Typically, we'll want to work with the data in a more convenient form. Fortunately, every |ExtractorResult| instance provides a .to_df() method that returns a pandas DataFrame: -:: +.. testsetup:: export_results + + from os.path import join + from pliers.tests.utils import get_test_data_path + jpg = join(get_test_data_path(), 'image', 'obama.jpg') + from pliers.extractors import FaceRecognitionFaceLocationsExtractor + ext = FaceRecognitionFaceLocationsExtractor() + result = ext.transform(jpg) + + +.. doctest:: export_results + :options: +NORMALIZE_WHITESPACE + >>> result.to_df() - onset duration object_id face_locations - NaN NaN 0 (142, 349, 409, 82) + order duration onset object_id face_locations + 0 NaN NaN NaN 0 (142, 349, 409, 82) Here, the ``'face_locations'`` column is properly labeled with the name of the feature returned by the |Extractor|. Not surprisingly, you'll still need to know something about the feature extraction tool you're using in order to understand what you're getting back. In this case, consulting the documentation for the face_recognition package's `face_locations `_ function reveals that the values ``(142, 349, 409, 82)`` give us the bounding box coordinates of the detected face in CSS order (i.e., top, right, bottom, left). @@ -42,22 +54,40 @@ Timing columns You're probably wondering what the other columns are. The ``'onset'`` and ``'duration'`` columns providing timing information for the event in question, if applicable. In this case, because our source |Stim| was a static image, there's no meaningful timing information to be had. But ``to_df()`` still returns these columns by default. This becomes important in cases where we want to preserve some temporal context as we pass |Stim| objects through a feature extraction pipeline: -:: - +.. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> from os.path import join + >>> from pliers.tests.utils import get_test_data_path + >>> jpg = join(get_test_data_path(), 'image', 'obama.jpg') + >>> from pliers.extractors import FaceRecognitionFaceLocationsExtractor + >>> from pliers.stimuli.image import ImageStim >>> ext = FaceRecognitionFaceLocationsExtractor() - >>> image = Stim('obama.jpg', onset=14, duration=1) + >>> image = ImageStim(jpg, onset=14, duration=1) >>> result = ext.transform(image) >>> result.to_df() - onset duration object_id face_locations - 14 1 0 (142, 349, 409, 82) + order duration onset object_id face_locations + 0 NaN 1 14 0 (142, 349, 409, 82) Of course, if we really don't want the timing columns, we can easily suppress them: -:: +.. testsetup:: timing + + from os.path import join + from pliers.tests.utils import get_test_data_path + jpg = join(get_test_data_path(), 'image', 'obama.jpg') + from pliers.extractors import FaceRecognitionFaceLocationsExtractor + from pliers.stimuli.image import ImageStim + ext = FaceRecognitionFaceLocationsExtractor() + image = ImageStim(jpg, onset=14, duration=1) + result = ext.transform(image) + +.. doctest:: timing + :options: +NORMALIZE_WHITESPACE >>> result.to_df(timing=False) - object_id face_locations - 0 (142, 349, 409, 82) + object_id face_locations + 0 0 (142, 349, 409, 82) We could also pass ``timing='auto'``, which would drop the ``'onset'`` and ``'duration'`` columns if and only if all values are ``NaN``. @@ -68,17 +98,23 @@ What about the ``'object_id'`` column? This one's not so intuitive, but can in s The solution is to serially assign each distinct result object a different ``object_id``. Let's modify the last example to feed in an image that contains 4 separate faces: -:: +.. doctest:: + :options: +NORMALIZE_WHITESPACE + >>> from os.path import join + >>> from pliers.tests.utils import get_test_data_path + >>> jpg = join(get_test_data_path(), 'image', 'thai_people.jpg') + >>> from pliers.extractors import FaceRecognitionFaceLocationsExtractor + >>> from pliers.stimuli.image import ImageStim >>> ext = FaceRecognitionFaceLocationsExtractor() - >>> image = Stim('obama.jpg', onset=14, duration=1) - + >>> image = ImageStim(jpg, onset=14, duration=1) >>> result = ext.transform(image) - onset duration object_id face_locations - 14 1 0 (236, 862, 325, 772) - 14 1 1 (104, 581, 211, 474) - 14 1 2 (365, 782, 454, 693) - 14 1 3 (265, 444, 355, 354) + >>> result.to_df() + order duration onset object_id face_locations + 0 NaN 1 14 0 (236, 862, 325, 772) + 1 NaN 1 14 1 (104, 581, 211, 474) + 2 NaN 1 14 2 (365, 782, 454, 693) + 3 NaN 1 14 3 (265, 444, 355, 354) As with the ``timing`` columns, if we don't want to see the ``object_id`` column, we can suppress it by calling ``.to_df(object_id=False)`` or ``.to_df(object_id='auto')``. In the latter case, the ``object_id`` column will be included if and only if the values are non-constant (i.e., there is some value other than 0 somewhere in the DataFrame). @@ -87,12 +123,14 @@ Displaying metadata Although not displayed by default, it's also possible to include additional metadata about the |Stim| and |Extractor| in the DataFrame returned by ``to_df``: -:: - >>> result = ext.transform('obama.jpg') +.. doctest:: timing + :options: +NORMALIZE_WHITESPACE + + >>> result = ext.transform(jpg) >>> result.to_df(timing=False, object_id=False, metadata=True) - face_locations stim_name class filename history source_file - (142, 349, 409, 82) obama.jpg ImageStim obama.jpg obama.jpg + face_locations stim_name class filename history source_file + 0 (142, 349, 409, 82) obama.jpg ImageStim ...obama.jpg ...obama.jpg Here we get columns for the |Stim| name (typically just the filename, unless we explicitly specified a different name), the current filename, the |Stim| history, and the source filename. In the above example, ``stim_name``, ``filename`` and ``source_file`` are identical, but this won't always be the case. For example, if the images we're running through the |FaceRecognitionFaceLocationsExtractor| had been extracted from frames of video, the ``source_file`` would point to the original video, while the ``filename`` would point to (temporary) image files corresponding to the extracted frames. @@ -102,12 +140,16 @@ Display mode ############ By default, DataFrames are in 'wide' format. That is, each row represents a single event, and all features are contained in columns. To get a better sense of what this means, it's helpful to look at an extractor that returns more than one feature: -:: +.. doctest:: + :skipif: os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is None + :options: +NORMALIZE_WHITESPACE + >>> from os.path import join + >>> from pliers.tests.utils import get_test_data_path + >>> apple = join(get_test_data_path(), 'image', 'apple.jpg') >>> from pliers.extractors import GoogleVisionAPILabelExtractor >>> ext = GoogleVisionAPILabelExtractor() - >>> result = ext.transform('apple.jpg') - + >>> result = ext.transform(apple) >>> result.to_df() onset duration object_id fruit apple produce food natural foods mcintosh diet food NaN NaN 0 0.968 0.966 0.959 0.824 0.801 0.629 0.607 @@ -116,7 +158,18 @@ Here we fed in an image of an apple, and the |GoogleVisionAPILabelExtractor| aut While there's nothing at all wrong with this format (indeed, it's the default!), sometimes we prefer to get back our data in 'long' format, where each row represents the intersection of a single event and a single feature: -:: +.. testsetup:: display + :skipif: os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is None + + from os.path import join + from pliers.tests.utils import get_test_data_path + apple = join(get_test_data_path(), 'image', 'apple.jpg') + from pliers.extractors import GoogleVisionAPILabelExtractor + ext = GoogleVisionAPILabelExtractor() + result = ext.transform(apple) + +.. doctest:: display + :skipif: os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is None >>> result.to_df(format='long', timing=False, object_id=False) feature value @@ -137,7 +190,8 @@ If we only ever worked with results generated by a single |Extractor| for a sing We can ensure that the name of the current |Extractor| is explicitly added to our results via the ``extractor_name`` argument. The precise behavior of ``extractor_name=True`` will depend on the ``format`` argument. When ``format='wide'``, the name will be added as the first level in a pandas MultiIndex; when ``format='long'``, a new column will be added. Examples: -:: +.. doctest:: display + :skipif: os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is None >>> results.to_df(format='long', timing=False, object_id=False, extractor_name=True) feature value extractor @@ -148,7 +202,6 @@ We can ensure that the name of the current |Extractor| is explicitly added to ou natural foods 0.801 GoogleVisionAPILabelExtractor mcintosh 0.629 GoogleVisionAPILabelExtractor diet food 0.607 GoogleVisionAPILabelExtractor - >>> results.to_df(timing=False, object_id=False, extractor_name=True) GoogleVisionAPILabelExtractor fruit apple produce food natural foods mcintosh diet food @@ -194,7 +247,7 @@ In practice, many users will primarily rely on the :ref:`Graph API ` for The main thing to be aware of in this case is that the ``.transform`` call takes any of the keyword arguments supported by ``merge_results``, and simply passes them through. This means you can control the output format and inclusion of various columns exactly as documented above for ``merge_results`` (and ``to_df``). Here's a minimalistic example to illustrate: -:: +.. doctest:: from pliers.graph import Graph from pliers.filters import FrameSamplingFilter From f9a6219d91f0e19e4744223b0037544268d53f4a Mon Sep 17 00:00:00 2001 From: James Kent Date: Thu, 4 Mar 2021 17:40:55 -0600 Subject: [PATCH 03/24] add example gallery to sphinx --- docs/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.rst b/docs/index.rst index ae6eb28c6..d59454b99 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,6 +8,7 @@ Pliers documentation installation quickstart + auto_examples/index basic-concepts stimuli transformers From a9b3e8c0cdd798c78e603daf3bb2b842a081d776 Mon Sep 17 00:00:00 2001 From: James Kent Date: Thu, 4 Mar 2021 22:46:01 -0600 Subject: [PATCH 04/24] install package for doc building --- .github/workflows/pull_request.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 0098f21c7..8bb1d318a 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -11,9 +11,22 @@ jobs: steps: - uses: actions/checkout@v1 # Grabbing custom dependencies and building as a pdf. + - name: Set up system dependencies + run: | + sudo apt-get update + sudo apt-get install libavformat-dev libavfilter-dev libavdevice-dev ffmpeg libmp3lame-dev tesseract-ocr graphviz cmake libboost-python-dev libgraphviz-dev + - uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('optional_requirements.txt') }} + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip wheel + python -m pip install --upgrade --ignore-installed setuptools + python -m pip install -r requirements.txt -r optional-dependencies.txt --upgrade --upgrade-strategy eager - uses: ammaraskar/sphinx-action@master with: - pre-build-command: "pip install sphinx-rtd-theme" + pre-build-command: "pip install sphinx-rtd-theme sphinx-gallery" docs-folder: "docs/" # Create an artifact of the html output. - uses: actions/upload-artifact@v1 From fe50f83bdd6895b26c5080d174202c71f44d66c3 Mon Sep 17 00:00:00 2001 From: James Kent Date: Thu, 4 Mar 2021 22:55:32 -0600 Subject: [PATCH 05/24] add miniconda --- .github/workflows/pull_request.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 8bb1d318a..70eada366 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -10,6 +10,10 @@ jobs: steps: - uses: actions/checkout@v1 + - uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: 3.6 # Grabbing custom dependencies and building as a pdf. - name: Set up system dependencies run: | From d025d36c104f5e747ca0ee506d537e151acd1344 Mon Sep 17 00:00:00 2001 From: James Kent Date: Thu, 4 Mar 2021 23:06:32 -0600 Subject: [PATCH 06/24] change default bash --- .github/workflows/pull_request.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 70eada366..4728900a8 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -7,7 +7,9 @@ jobs: build: runs-on: ubuntu-latest - + defaults: + run: + shell: bash -l {0} steps: - uses: actions/checkout@v1 - uses: conda-incubator/setup-miniconda@v2 From b6d768251d875bba6f4fa0699206b539713fd88f Mon Sep 17 00:00:00 2001 From: James Kent Date: Thu, 4 Mar 2021 23:28:40 -0600 Subject: [PATCH 07/24] install pliers explicitly and cache conda environment --- .github/workflows/pull_request.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 4728900a8..4350b2bab 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -12,6 +12,10 @@ jobs: shell: bash -l {0} steps: - uses: actions/checkout@v1 + - uses: actions/cache@v2 + with: + path: /usr/share/miniconda/envs/test + key: ${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('optional_requirements.txt') }}-conda - uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true @@ -21,15 +25,13 @@ jobs: run: | sudo apt-get update sudo apt-get install libavformat-dev libavfilter-dev libavdevice-dev ffmpeg libmp3lame-dev tesseract-ocr graphviz cmake libboost-python-dev libgraphviz-dev - - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('optional_requirements.txt') }} - name: Install Python dependencies run: | python -m pip install --upgrade pip wheel python -m pip install --upgrade --ignore-installed setuptools + python -m pip install --upgrade sphinx-rtd-theme sphinx-gallery --upgrade-strategy eager python -m pip install -r requirements.txt -r optional-dependencies.txt --upgrade --upgrade-strategy eager + python -m pip install . - uses: ammaraskar/sphinx-action@master with: pre-build-command: "pip install sphinx-rtd-theme sphinx-gallery" From 086268a50695edb7257f62322aa4bf5a7f8fbef4 Mon Sep 17 00:00:00 2001 From: James Kent Date: Thu, 4 Mar 2021 23:43:27 -0600 Subject: [PATCH 08/24] add all to install --- .github/workflows/pull_request.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 4350b2bab..248013a1e 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -31,10 +31,9 @@ jobs: python -m pip install --upgrade --ignore-installed setuptools python -m pip install --upgrade sphinx-rtd-theme sphinx-gallery --upgrade-strategy eager python -m pip install -r requirements.txt -r optional-dependencies.txt --upgrade --upgrade-strategy eager - python -m pip install . + python -m pip install .[all] - uses: ammaraskar/sphinx-action@master with: - pre-build-command: "pip install sphinx-rtd-theme sphinx-gallery" docs-folder: "docs/" # Create an artifact of the html output. - uses: actions/upload-artifact@v1 From 2913227cbd0072cccfa3f6a8dead25657a0da327 Mon Sep 17 00:00:00 2001 From: James Kent Date: Sun, 7 Mar 2021 01:32:28 -0600 Subject: [PATCH 09/24] attempt running doc tests through docker --- .github/workflows/test-docker.yml | 161 ++++++++++++++++++++++++++++++ docker/Dockerfile | 35 +++++-- setup.py | 62 ++++++++++-- 3 files changed, 241 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/test-docker.yml diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml new file mode 100644 index 000000000..b752bda04 --- /dev/null +++ b/.github/workflows/test-docker.yml @@ -0,0 +1,161 @@ + +name: Pull Request Docker Check + +on: [pull_request, workflow_dispatch,push] + +env: + EXEC_IMG: localhost:5000/pliers:exec + BUILDER_IMG: localhost:5000/pliers:build + DOC_IMG: localhost:5000/pliers:docs + REGISTRY_PATH: /tmp/registry + CACHE_PATH: /tmp/.buildx-cache + + +jobs: + build_image: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - + name: Cache Docker Registry + uses: actions/cache@v2 + with: + path: | + /tmp/registry + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + - + name: Start Docker registry + run: | + docker run -d -p 5000:5000 -v ${REGISTRY_PATH}:/var/lib/registry --name registry registry:2 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + with: + driver-opts: network=host + - + name: Pull Prebuilt Images + run: | + set +e + docker pull localhost:5000/python:3.7-slim + success=$? + set -e + if [[ "$success" = "0" ]]; then + echo "Pulling from local registry" + docker tag localhost:5000/python:3.7-slim python:3.7-slim + else + echo "Pulling from Docker Hub" + docker pull python:3.7-slim + docker tag python:3.7-slim localhost:5000/python:3.7-slim + docker push localhost:5000/python:3.7-slim + fi + set +e + docker pull ${BUILDER_IMG} || true + docker pull ${EXEC_IMG} || true + - + name: Build and Cache Builder + uses: docker/build-push-action@v2 + with: + context: . + file: ./docker/Dockerfile + target: builder + build-args: | + BUILDKIT_INLINE_CACHE=1 + cache-from: | + ${{ env.BUILDER_IMG }} + push: true + tags: ${{ env.BUILDER_IMG }} + - + name: Build and Cache Executable + uses: docker/build-push-action@v2 + with: + context: . + file: ./docker/Dockerfile + target: executable + build-args: | + BUILDKIT_INLINE_CACHE=1 + cache-from: | + ${{ env.EXEC_IMG }} + push: true + tags: ${{ env.EXEC_IMG }} + - + name: Stop Docker registry + run: docker stop registry + - + name: Upload Docker Registry Data for Downstream Jobs + uses: actions/upload-artifact@v2 + with: + name: docker-registry-data + path: /tmp/registry + + docs: + runs-on: ubuntu-latest + needs: build_image + steps: + - + name: Checkout + uses: actions/checkout@v2 + - + name: Download Docker Registry Data from Build Job + uses: actions/download-artifact@v2 + with: + name: docker-registry-data + path: /tmp/registry + - + name: Cache Docker Layers + uses: actions/cache@v2 + with: + path: ${{ env.CACHE_PATH }} + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + - + name: Start Docker Registry + run: | + docker run -d -p 5000:5000 -v ${REGISTRY_PATH}:/var/lib/registry --name registry registry:2 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + with: + driver-opts: network=host + - + name: Pull Prebuilt Images + run: | + docker pull ${BUILDER_IMG} + docker pull ${EXEC_IMG} + - + name: Display Images + run: | + docker images + - + name: Build and Cache Docs + uses: docker/build-push-action@v2 + with: + context: . + file: ./docker/Dockerfile + tags: ${{ env.DOC_IMG }} + target: docs + load: true + cache-from: | + ${{ env.BUILDER_IMG }} + ${{ env.EXEC_IMG }} + type=local,src=${{ env.CACHE_PATH }} + cache-to: type=local,dest=${{ env.CACHE_PATH }}-new + - + name: Run Doc Tests + run: docker run ${DOC_IMG} + - + name: Stop Docker registry + run: docker stop registry + - + # Temp fix + # https://github.com/docker/build-push-action/issues/252 + # https://github.com/moby/buildkit/issues/1896 + name: Move cache + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache + + diff --git a/docker/Dockerfile b/docker/Dockerfile index d685c993c..47fe20649 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -14,6 +14,7 @@ RUN chmod a+rX -R . \ libc6-dev \ libgraphviz-dev \ libmagic-dev \ + libsndfile1 \ make \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -23,29 +24,47 @@ RUN python -m venv /opt/venv RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel RUN python -m pip install --no-cache-dir --requirement requirements.txt # Install the non-GPU tensorflow package because it is smaller. -RUN sed -i 's/tensorflow/tensorflow-cpu/g' optional-dependencies.txt \ +RUN sed -i 's/tensorflow>/tensorflow-cpu>/g' optional-dependencies.txt \ && python -m pip install --no-cache-dir --requirement optional-dependencies.txt RUN python -m pip install --no-cache-dir --editable . RUN python -m pip install --no-cache-dir ipython notebook RUN python -m spacy download en_core_web_sm +RUN python -m pliers.support.download \ + && python -m pliers.support.setup_yamnet -FROM python:3.7-slim -RUN useradd --create-home --shell /bin/bash pliers \ - # Empty top level directories to facilitate use of the image in singularity - # on a box with kernel lacking overlay FS support - && mkdir -p /data /backup \ +FROM python:3.7-slim as base +# Empty top level directories to facilitate use of the image in singularity +# on a box with kernel lacking overlay FS support +RUN mkdir -p /data /backup \ && apt-get update -qq \ && apt-get install -yq --no-install-recommends \ ffmpeg \ graphviz \ libmagic1 \ tesseract-ocr \ + make \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* + +FROM base as executable +RUN useradd --create-home --shell /bin/bash pliers COPY --from=builder --chown=pliers /opt/venv /opt/venv COPY --from=builder --chown=pliers /opt/pliers /opt/pliers +COPY --from=builder --chown=pliers /root/nltk_data /home/pliers/nltk_data +COPY --from=builder --chown=pliers /root/pliers_data /home/pliers/pliers_data ENV PATH="/opt/venv/bin:$PATH" USER pliers -RUN python -m pliers.support.download \ - && python -m pliers.support.setup_yamnet WORKDIR /work + +FROM executable as docs +WORKDIR /opt/pliers/docs +RUN python -m pip install /opt/pliers['docs'] +CMD make doctest && make html + +FROM executable as tests +WORKDIR /opt/pliers +RUN python -m pip install /opt/pliers['tests'] + +CMD py.test pliers/tests -n auto --cov=pliers --cov-report xml -m "not requires_payment" -W ignore::UserWarning &&\ + skip_high_memory=false &&\ + py.test pliers/tests/extractors/test_model_extractors.py -n auto --forked --cov-append --cov=pliers --cov-report xml -m "not requires_payment" -W ignore::UserWarning \ No newline at end of file diff --git a/setup.py b/setup.py index 96f427807..2c94ec530 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,20 @@ maintainer='Tal Yarkoni', maintainer_email='tyarkoni@gmail.com', url='http://github.com/tyarkoni/pliers', - install_requires=['numpy', 'scipy', 'moviepy', 'pandas', - 'pillow', 'python-magic', 'requests', 'nltk'], + + install_requires=[ + 'imageio>=2.3', + 'moviepy>=0.2', + 'nltk>=3.0', + 'numpy>=1.13', + 'pandas>=0.24', + 'pillow', + 'psutil', + 'python-magic', + 'requests', + 'scipy>=0.13', + 'tqdm' + ], packages=find_packages(exclude=['pliers/tests']), license='MIT', package_data={'pliers': ['datasets/*'], @@ -31,13 +43,45 @@ __version__, **extra_setuptools_args, extras_require={ - 'all': ['clarifai', 'duecredit', 'face_recognition', 'python-twitter', - 'gensim', 'google-api-python-client', 'google-compute-engine', - 'librosa>=0.6.3' 'numba<=0.48', 'matplotlib', 'opencv-python', - 'pathos', 'pygraphviz', 'pysrt', 'pytesseract', - 'python-twitter', 'scikit-learn', 'seaborn', 'soundfile', - 'spacy', 'SpeechRecognition>=3.6.0', 'tensorflow>=1.0.0', - 'torch', 'transformers', 'xlrd', 'rev_ai'] + 'extractors': [ + 'clarifai', + 'duecredit', + 'face_recognition', + 'gensim', + 'google-api-python-client', + 'google-compute-engine', + 'librosa>=0.6.3', + 'numba<=0.48', + 'matplotlib', + 'opencv-python', + 'openpyxl', + 'pathos', + 'pygraphviz', + 'pysrt', + 'pytesseract', + 'python-twitter', + 'rev_ai', + 'scikit-learn', + 'seaborn', + 'spacy', + 'SpeechRecognition>=3.6.0', + 'tensorflow>=2.0.0', + 'torch', + 'transformers', + 'tensorflow-hub', + 'tensorflow_text', + 'xlrd' + ], + 'docs': [ + 'sphinx-rtd-theme', + 'sphinx-gallery', + ], + 'tests': [ + 'coveralls', + 'pytest-cov', + 'pytest-forked', + 'pytest-xdist', + ] }, python_requires='>=3.6', ) From f32191b21b1233a14fb32db5af7738dbbaba3930 Mon Sep 17 00:00:00 2001 From: James Kent Date: Sun, 7 Mar 2021 01:33:11 -0600 Subject: [PATCH 10/24] remove path insertion --- docs/conf.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index d90d3dd93..6284be61d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,9 +20,6 @@ import os import sys -sys.path.insert(0, os.path.abspath('..')) - - # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. From 074c8065ff4672af3fa7f0ea4861498f689c8003 Mon Sep 17 00:00:00 2001 From: James Kent Date: Sun, 7 Mar 2021 18:03:38 -0600 Subject: [PATCH 11/24] make dockerfile more cache friendly --- docker/Dockerfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 47fe20649..8abf6aa42 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,7 +1,6 @@ FROM python:3.7-slim as builder ARG DEBIAN_FRONTEND="noninteractive" WORKDIR /opt/pliers -COPY . . # Install dependencies into a virtual environment so they can be easily copied into # the second stage. ENV PATH="/opt/venv/bin:$PATH" @@ -22,12 +21,15 @@ RUN chmod a+rX -R . \ # do impose a size penalty, because we build the final image in a separate stage. RUN python -m venv /opt/venv RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel +RUN python -m pip install --no-cache-dir ipython notebook +COPY requirements.txt . RUN python -m pip install --no-cache-dir --requirement requirements.txt # Install the non-GPU tensorflow package because it is smaller. +COPY optional-dependencies.txt . RUN sed -i 's/tensorflow>/tensorflow-cpu>/g' optional-dependencies.txt \ && python -m pip install --no-cache-dir --requirement optional-dependencies.txt +COPY . . RUN python -m pip install --no-cache-dir --editable . -RUN python -m pip install --no-cache-dir ipython notebook RUN python -m spacy download en_core_web_sm RUN python -m pliers.support.download \ && python -m pliers.support.setup_yamnet From 29e9875cc760a46df17f39ed25c194c109a6b36b Mon Sep 17 00:00:00 2001 From: James Kent Date: Sun, 7 Mar 2021 18:04:13 -0600 Subject: [PATCH 12/24] use registry directly --- .github/workflows/test-docker.yml | 64 +++++++++++++++++-------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index b752bda04..60cab02e0 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -1,7 +1,7 @@ name: Pull Request Docker Check -on: [pull_request, workflow_dispatch,push] +on: [pull_request,workflow_dispatch,push] env: EXEC_IMG: localhost:5000/pliers:exec @@ -36,7 +36,7 @@ jobs: with: driver-opts: network=host - - name: Pull Prebuilt Images + name: Rename builder image run: | set +e docker pull localhost:5000/python:3.7-slim @@ -51,40 +51,40 @@ jobs: docker tag python:3.7-slim localhost:5000/python:3.7-slim docker push localhost:5000/python:3.7-slim fi - set +e - docker pull ${BUILDER_IMG} || true - docker pull ${EXEC_IMG} || true + # set +e + # docker pull ${BUILDER_IMG} || true + # docker pull ${EXEC_IMG} || true - - name: Build and Cache Builder + name: Build and export builder uses: docker/build-push-action@v2 with: context: . file: ./docker/Dockerfile target: builder - build-args: | - BUILDKIT_INLINE_CACHE=1 cache-from: | - ${{ env.BUILDER_IMG }} + type=registry,ref=${{ env.BUILDER_IMG }} + cache-to: | + type=registry,ref=${{ env.BUILDER_IMG }},mode=max push: true tags: ${{ env.BUILDER_IMG }} - - name: Build and Cache Executable + name: Build and export executable uses: docker/build-push-action@v2 with: context: . file: ./docker/Dockerfile target: executable - build-args: | - BUILDKIT_INLINE_CACHE=1 cache-from: | - ${{ env.EXEC_IMG }} + type=registry,ref=${{ env.EXEC_IMG }} + cache-to: | + type=registry,ref=${{ env.EXEC_IMG }},mode=max push: true tags: ${{ env.EXEC_IMG }} - name: Stop Docker registry run: docker stop registry - - name: Upload Docker Registry Data for Downstream Jobs + name: Upload Docker registry data for next steps uses: actions/upload-artifact@v2 with: name: docker-registry-data @@ -98,13 +98,13 @@ jobs: name: Checkout uses: actions/checkout@v2 - - name: Download Docker Registry Data from Build Job + name: Download Docker registry data from build job uses: actions/download-artifact@v2 with: name: docker-registry-data path: /tmp/registry - - name: Cache Docker Layers + name: Cache Docker layers uses: actions/cache@v2 with: path: ${{ env.CACHE_PATH }} @@ -112,7 +112,7 @@ jobs: restore-keys: | ${{ runner.os }}-buildx- - - name: Start Docker Registry + name: Start Docker registry run: | docker run -d -p 5000:5000 -v ${REGISTRY_PATH}:/var/lib/registry --name registry registry:2 - @@ -120,17 +120,21 @@ jobs: uses: docker/setup-buildx-action@v1 with: driver-opts: network=host + # - + # name: Import Docker images + # run: | + # docker pull ${BUILDER_IMG} + # docker pull ${EXEC_IMG} - - name: Pull Prebuilt Images - run: | - docker pull ${BUILDER_IMG} - docker pull ${EXEC_IMG} - - - name: Display Images + name: display images run: | docker images + # - + # name: Setup tmate session + # uses: mxschmitt/action-tmate@v3 + # timeout-minutes: 30 - - name: Build and Cache Docs + name: Build and export uses: docker/build-push-action@v2 with: context: . @@ -139,12 +143,16 @@ jobs: target: docs load: true cache-from: | - ${{ env.BUILDER_IMG }} - ${{ env.EXEC_IMG }} + type=registry,ref=${{ env.BUILDER_IMG }} + type=registry,ref=${{ env.EXEC_IMG }} type=local,src=${{ env.CACHE_PATH }} cache-to: type=local,dest=${{ env.CACHE_PATH }}-new + # - + # name: Setup tmate session + # uses: mxschmitt/action-tmate@v3 + # timeout-minutes: 30 - - name: Run Doc Tests + name: run doc tests run: docker run ${DOC_IMG} - name: Stop Docker registry @@ -157,5 +165,3 @@ jobs: run: | rm -rf /tmp/.buildx-cache mv /tmp/.buildx-cache-new /tmp/.buildx-cache - - From bcf35a26959e3c0287794b8f64a11aabec8333c0 Mon Sep 17 00:00:00 2001 From: James Kent Date: Sun, 7 Mar 2021 19:08:10 -0600 Subject: [PATCH 13/24] add test --- .github/workflows/test-docker.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 60cab02e0..040fb85f2 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -82,7 +82,11 @@ jobs: tags: ${{ env.EXEC_IMG }} - name: Stop Docker registry - run: docker stop registry + run: | + docker run --rm --link registry anoxis/registry-cli -r http://localhost:5000 --delete --num 2 + docker stop registry + docker run --rm registry:2 bin/registry garbage-collect \ + /etc/docker/registry/config.yml - name: Upload Docker registry data for next steps uses: actions/upload-artifact@v2 @@ -129,10 +133,10 @@ jobs: name: display images run: | docker images - # - - # name: Setup tmate session - # uses: mxschmitt/action-tmate@v3 - # timeout-minutes: 30 + - + name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 30 - name: Build and export uses: docker/build-push-action@v2 From a2fbdf0235bc82c58caee205b5c9c657ebfd3ded Mon Sep 17 00:00:00 2001 From: James Kent Date: Sun, 7 Mar 2021 22:21:30 -0600 Subject: [PATCH 14/24] try to fix docker registry command --- .dockerignore | 1 - .github/workflows/test-docker.yml | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.dockerignore b/.dockerignore index b73980904..18a855a59 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,4 @@ docker/Dockerfile - *.pyc *.DS_Store *~ diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 040fb85f2..b33252ada 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -83,9 +83,9 @@ jobs: - name: Stop Docker registry run: | - docker run --rm --link registry anoxis/registry-cli -r http://localhost:5000 --delete --num 2 + docker run --rm --link registry anoxis/registry-cli -r localhost:5000 --delete --network host --num 2 docker stop registry - docker run --rm registry:2 bin/registry garbage-collect \ + docker run --rm -v ${REGISTRY_PATH}:/var/lib/registry registry:2 bin/registry garbage-collect \ /etc/docker/registry/config.yml - name: Upload Docker registry data for next steps From ec8bac66200185f076110791f9940695618f92ab Mon Sep 17 00:00:00 2001 From: James Kent Date: Sun, 7 Mar 2021 23:42:05 -0600 Subject: [PATCH 15/24] change both files --- .dockerignore | 1 + .github/workflows/test-docker.yml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 18a855a59..0f2ca29e2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,5 @@ docker/Dockerfile +.github *.pyc *.DS_Store *~ diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index b33252ada..7e1aa5fd0 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -83,7 +83,7 @@ jobs: - name: Stop Docker registry run: | - docker run --rm --link registry anoxis/registry-cli -r localhost:5000 --delete --network host --num 2 + docker run --rm --link --network=host registry anoxis/registry-cli -r localhost:5000 --delete --num 2 docker stop registry docker run --rm -v ${REGISTRY_PATH}:/var/lib/registry registry:2 bin/registry garbage-collect \ /etc/docker/registry/config.yml From 05c8be48b5de9962befc00e485e6d40f7fbe2fe6 Mon Sep 17 00:00:00 2001 From: James Kent Date: Sun, 7 Mar 2021 23:55:32 -0600 Subject: [PATCH 16/24] fix args --- .github/workflows/test-docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 7e1aa5fd0..6ab2795b5 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -83,7 +83,7 @@ jobs: - name: Stop Docker registry run: | - docker run --rm --link --network=host registry anoxis/registry-cli -r localhost:5000 --delete --num 2 + docker run --rm --network=host --link registry anoxis/registry-cli -r localhost:5000 --delete --num 2 docker stop registry docker run --rm -v ${REGISTRY_PATH}:/var/lib/registry registry:2 bin/registry garbage-collect \ /etc/docker/registry/config.yml From bfed51bb8ae0fea03553b59d2217d624b4a9f0c9 Mon Sep 17 00:00:00 2001 From: James Kent Date: Mon, 8 Mar 2021 00:21:44 -0600 Subject: [PATCH 17/24] abide by docs --- .github/workflows/test-docker.yml | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 6ab2795b5..7c7f29762 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -5,7 +5,7 @@ on: [pull_request,workflow_dispatch,push] env: EXEC_IMG: localhost:5000/pliers:exec - BUILDER_IMG: localhost:5000/pliers:build + CACHE_IMG: localhost:5000/pliers:cache DOC_IMG: localhost:5000/pliers:docs REGISTRY_PATH: /tmp/registry CACHE_PATH: /tmp/.buildx-cache @@ -36,7 +36,7 @@ jobs: with: driver-opts: network=host - - name: Rename builder image + name: Pull Base Image run: | set +e docker pull localhost:5000/python:3.7-slim @@ -54,19 +54,6 @@ jobs: # set +e # docker pull ${BUILDER_IMG} || true # docker pull ${EXEC_IMG} || true - - - name: Build and export builder - uses: docker/build-push-action@v2 - with: - context: . - file: ./docker/Dockerfile - target: builder - cache-from: | - type=registry,ref=${{ env.BUILDER_IMG }} - cache-to: | - type=registry,ref=${{ env.BUILDER_IMG }},mode=max - push: true - tags: ${{ env.BUILDER_IMG }} - name: Build and export executable uses: docker/build-push-action@v2 @@ -75,15 +62,15 @@ jobs: file: ./docker/Dockerfile target: executable cache-from: | - type=registry,ref=${{ env.EXEC_IMG }} + type=registry,ref=${{ env.CACHE_IMG }} cache-to: | - type=registry,ref=${{ env.EXEC_IMG }},mode=max + type=registry,ref=${{ env.CACHE_IMG }},mode=max push: true tags: ${{ env.EXEC_IMG }} - name: Stop Docker registry run: | - docker run --rm --network=host --link registry anoxis/registry-cli -r localhost:5000 --delete --num 2 + docker run --rm --link registry anoxis/registry-cli -r http://registry:5000 --delete --num 2 docker stop registry docker run --rm -v ${REGISTRY_PATH}:/var/lib/registry registry:2 bin/registry garbage-collect \ /etc/docker/registry/config.yml @@ -147,8 +134,7 @@ jobs: target: docs load: true cache-from: | - type=registry,ref=${{ env.BUILDER_IMG }} - type=registry,ref=${{ env.EXEC_IMG }} + type=registry,ref=${{ env.CACHE_IMG }} type=local,src=${{ env.CACHE_PATH }} cache-to: type=local,dest=${{ env.CACHE_PATH }}-new # - From 5e6cadd8bb30b1d94be292d62d68fcf1c657cd6a Mon Sep 17 00:00:00 2001 From: James Kent Date: Mon, 8 Mar 2021 01:19:30 -0600 Subject: [PATCH 18/24] trigger cache build From 429a4e3cbcb7a98b600dad4204badcabf6b4b69e Mon Sep 17 00:00:00 2001 From: James Kent Date: Mon, 8 Mar 2021 01:26:35 -0600 Subject: [PATCH 19/24] start fresh --- .github/workflows/test-docker.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 7c7f29762..1b291c8e2 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -29,7 +29,8 @@ jobs: - name: Start Docker registry run: | - docker run -d -p 5000:5000 -v ${REGISTRY_PATH}:/var/lib/registry --name registry registry:2 + rm -rf /data/registry/* + docker run -d -p 5000:5000 -e REGISTRY_STORAGE_DELETE_ENABLED=true -v ${REGISTRY_PATH}:/var/lib/registry --name registry registry:2 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 From 175158b67aff565b849a326628165d01fe7afb3a Mon Sep 17 00:00:00 2001 From: James Kent Date: Mon, 8 Mar 2021 01:27:35 -0600 Subject: [PATCH 20/24] start fresh --- .github/workflows/test-docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 1b291c8e2..5b18aa0db 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -29,7 +29,7 @@ jobs: - name: Start Docker registry run: | - rm -rf /data/registry/* + rm -rf /data/registry/* docker run -d -p 5000:5000 -e REGISTRY_STORAGE_DELETE_ENABLED=true -v ${REGISTRY_PATH}:/var/lib/registry --name registry registry:2 - name: Set up Docker Buildx From fd5deefc096b8dfd97318f1df592ed4bc7ee0b1a Mon Sep 17 00:00:00 2001 From: James Kent Date: Mon, 8 Mar 2021 02:05:40 -0600 Subject: [PATCH 21/24] do not remove everything --- .github/workflows/test-docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 5b18aa0db..f7cd47620 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -29,7 +29,7 @@ jobs: - name: Start Docker registry run: | - rm -rf /data/registry/* + # rm -rf /data/registry/* docker run -d -p 5000:5000 -e REGISTRY_STORAGE_DELETE_ENABLED=true -v ${REGISTRY_PATH}:/var/lib/registry --name registry registry:2 - name: Set up Docker Buildx From 679e00f88b7760a116f8a91bdecaa001479a2ba1 Mon Sep 17 00:00:00 2001 From: James Kent Date: Mon, 8 Mar 2021 02:43:52 -0600 Subject: [PATCH 22/24] add .git to dockerignore --- .dockerignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 0f2ca29e2..1322e6735 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,8 +1,8 @@ docker/Dockerfile .github +.git *.pyc *.DS_Store -*~ benchmarks/ build/ dist From be13cb6e2aaf12baa8c206c828524ff9ba40fe74 Mon Sep 17 00:00:00 2001 From: James Kent Date: Mon, 8 Mar 2021 10:19:12 -0600 Subject: [PATCH 23/24] do not remove everything --- .github/workflows/test-docker.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index f7cd47620..dea3ebbac 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -29,7 +29,6 @@ jobs: - name: Start Docker registry run: | - # rm -rf /data/registry/* docker run -d -p 5000:5000 -e REGISTRY_STORAGE_DELETE_ENABLED=true -v ${REGISTRY_PATH}:/var/lib/registry --name registry registry:2 - name: Set up Docker Buildx From fb94285599fa40e7c2c30117b4036292b234f70b Mon Sep 17 00:00:00 2001 From: James Kent Date: Mon, 8 Mar 2021 11:18:06 -0600 Subject: [PATCH 24/24] remove tmate session --- .github/workflows/test-docker.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index dea3ebbac..224061338 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -120,10 +120,10 @@ jobs: name: display images run: | docker images - - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 30 + # - + # name: Setup tmate session + # uses: mxschmitt/action-tmate@v3 + # timeout-minutes: 30 - name: Build and export uses: docker/build-push-action@v2