Skip to content

Add flexible session navigation features #55

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
996809c
Enhance navigation cmdlet with text/selector
PrzemyslawKlys Jun 9, 2025
bfc90f5
refactor(examples): enhance navigation cmdlet usage in Example-Browse…
PrzemyslawKlys Jun 9, 2025
9e90e6f
Handle global session in download and fix screenshot parameter set
PrzemyslawKlys Jun 9, 2025
e4e17a4
Merge branch '4fbkir-enhance-invoke-htmlnavigation-cmdlet-with-sessio…
PrzemyslawKlys Jun 9, 2025
5524ff4
fix(examples): update screenshot output and enhance navigation handli…
PrzemyslawKlys Jun 9, 2025
03c4116
Merge branch 'v2-speedygonzales' into 4fbkir-enhance-invoke-htmlnavig…
PrzemyslawKlys Jun 9, 2025
711f080
Add HTML content retrieval and navigation fixes
PrzemyslawKlys Jun 9, 2025
ed159e0
feat(navigation): add timeout option and interactable listing
PrzemyslawKlys Jun 10, 2025
ac40ec5
Refactor navigation logic into renderer
PrzemyslawKlys Jun 10, 2025
5fafc53
feat(examples): ✨ Update navigation comments and enhance screenshot f…
PrzemyslawKlys Jun 10, 2025
01b9d77
fix(psd1): 🔧 Reorder `CmdletsToExport` for consistency
PrzemyslawKlys Jun 10, 2025
409c9d6
Improve interactable discovery
PrzemyslawKlys Jun 10, 2025
e5eee35
add filter parameter to Get-HTMLInteractable
PrzemyslawKlys Jun 10, 2025
1b40689
Merge branch 'v2-speedygonzales' into 4fbkir-enhance-invoke-htmlnavig…
PrzemyslawKlys Jun 10, 2025
5c91cf0
fix(CmdletInvokeHtmlNavigation): 🔧 Reduce default `Timeout` to 10 sec…
PrzemyslawKlys Jun 10, 2025
ca33803
Simplify interactable selectors
PrzemyslawKlys Jun 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions Examples/Example-BrowserSessionWpAdmin.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,21 @@ $invokeHTMLRenderingSplat = @{
$session = Open-HTMLSession @invokeHTMLRenderingSplat
# Save screenshot of the page should work with session
Save-HTMLScreenshot -Session $Session -OutFile "$PSScriptRoot\Output\EvotecPageAdmin1.png" -Open
# We should add new cmdlet that will navigate to the page we tell it to navigate to
$null = Invoke-HTMLNavigation -Session $Session -Url 'https://evotec.xyz/wp-admin/edit.php'
# Save screenshot of the page should work with session
# # We should add new cmdlet that will navigate to the page we tell it to navigate to
Invoke-HTMLNavigation -Session $Session -Url 'https://evotec.xyz/wp-admin/edit.php'
# # Save screenshot of the page should work with session
Save-HTMLScreenshot -Session $Session -OutFile "$PSScriptRoot\Output\EvotecPageAdmin2.png" -Open
# We should add new cmdlet that will navigate to the page we tell it to navigate to, but also allow Save-HTMLScreenshot to work with session from the page we navigate to
Invoke-HTMLNavigation -Session $Session -Url 'https://evotec.xyz/wp-admin/edit.php' | Save-HTMLScreenshot -OutFile "$PSScriptRoot\Output\EvotecPageAdmin3.png" -Open
# Navigate to plugins page and save screenshot
Invoke-HTMLNavigation -Session $Session -Url 'https://evotec.xyz/wp-admin/edit.php?post_type=page' | Save-HTMLScreenshot -OutFile "$PSScriptRoot\Output\EvotecPageAdmin4.png" -Open
# # We should add new cmdlet that will navigate to the page we tell it to navigate to, but also allow Save-HTMLScreenshot to work with session from the page we navigate to
Invoke-HTMLNavigation -Session $Session -Url 'https://evotec.xyz/wp-admin/edit.php' -PassThru | Save-HTMLScreenshot -OutFile "$PSScriptRoot\Output\EvotecPageAdmin3.png" -Open
# # Navigate to plugins page and save screenshot
Invoke-HTMLNavigation -Session $Session -Url 'https://evotec.xyz/wp-admin/edit.php?post_type=page' -PassThru | Save-HTMLScreenshot -OutFile "$PSScriptRoot\Output\EvotecPageAdmin4.png" -Open
# # Navigate to team members page and save screenshot
Invoke-HTMLNavigation -Session $Session -Url 'https://evotec.xyz/wp-admin/edit.php?post_type=thegem_team_person' -PassThru | Save-HTMLScreenshot -OutFile "$PSScriptRoot\Output\EvotecPageAdmin3.png" -Open
# Get interactable elements from the session
Get-HTMLInteractable -Session $Session | Format-Table
# Navigate to profile page, this should error because of multiple elements with the same text
Invoke-HTMLNavigation -Session $Session -Text "Profile" -PassThru | Save-HTMLScreenshot -OutFile "$PSScriptRoot\Output\EvotecPageAdmin5.png" -Open
# Be exact with the text to avoid multiple elements with the same text
Invoke-HTMLNavigation -Session $Session -Text "Profile" -Exact -PassThru | Save-HTMLScreenshot -OutFile "$PSScriptRoot\Output\EvotecPageAdmin5.png" -Open
# Close the session using new cmdlet alias (Stop-HTMLSession)
Close-HTMLSession -Session $Session | Out-Null
2 changes: 1 addition & 1 deletion PSParseHTML.psd1
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@{
AliasesToExport = @('Stop-HTMLSession', 'ConvertFrom-HTMLTag', 'ConvertFrom-HTMLClass', 'Format-JS', 'Start-HTMLSession', 'Open-HTMLSession', 'Save-HTMLAttachment')
Author = 'Przemyslaw Klys'
CmdletsToExport = @('Close-HTMLSession', 'ConvertFrom-HTML', 'ConvertFrom-HtmlAttributes', 'ConvertFrom-HtmlList', 'ConvertFrom-HtmlTable', 'Convert-HTMLToText', 'Format-CSS', 'Format-HTML', 'Format-JavaScript', 'Get-HTMLInteractable', 'Invoke-HTMLNavigation', 'Invoke-HTMLRendering', 'Optimize-CSS', 'Optimize-Email', 'Optimize-HTML', 'Optimize-JavaScript', 'Save-HTMLDownload', 'Save-HTMLScreenshot')
CmdletsToExport = @('Close-HTMLSession', 'ConvertFrom-HTML', 'ConvertFrom-HtmlAttributes', 'ConvertFrom-HtmlList', 'ConvertFrom-HtmlTable', 'Convert-HTMLToText', 'Format-CSS', 'Format-HTML', 'Format-JavaScript', 'Get-HTMLContent', 'Get-HTMLInteractable', 'Invoke-HTMLNavigation', 'Invoke-HTMLRendering', 'Optimize-CSS', 'Optimize-Email', 'Optimize-HTML', 'Optimize-JavaScript', 'Save-HTMLDownload', 'Save-HTMLScreenshot')
CompanyName = 'Evotec'
CompatiblePSEditions = @('Desktop', 'Core')
Copyright = '(c) Przemyslaw Klys. All rights reserved.'
Expand Down
4 changes: 4 additions & 0 deletions Sources/PSParseHTML.PowerShell/CmdletCloseHtmlSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ public sealed class CmdletCloseHtmlSession : AsyncPSCmdlet {
/// <inheritdoc />
protected override async Task ProcessRecordAsync() {
await HtmlBrowserRenderer.CloseSessionAsync(Session).ConfigureAwait(false);
object? defaultSession = GetVariableValue("PSParseHTML_DefaultSession");
if (defaultSession is BrowserSession sess && ReferenceEquals(sess, Session)) {
SessionState.PSVariable.Remove("PSParseHTML_DefaultSession");
}
}
}

54 changes: 54 additions & 0 deletions Sources/PSParseHTML.PowerShell/CmdletGetHtmlContent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
using System.Management.Automation;
using System.Threading.Tasks;

namespace PSParseHTML.PowerShell;

/// <summary>
/// Cmdlet that retrieves HTML or text content from an existing session.
/// </summary>
[Cmdlet(VerbsCommon.Get, "HTMLContent")]
[OutputType(typeof(string))]
public sealed class CmdletGetHtmlContent : AsyncPSCmdlet {
/// <summary>Browser session in use.</summary>
[Parameter(Position = 0, ValueFromPipeline = true)]
public BrowserSession? Session { get; set; }

/// <summary>CSS selector for the target element.</summary>
[Parameter]
public string? Selector { get; set; }

/// <summary>Return inner HTML instead of outer HTML.</summary>
[Parameter]
public SwitchParameter InnerHtml { get; set; }

/// <summary>Return outer HTML. This is the default.</summary>
[Parameter]
public SwitchParameter OuterHtml { get; set; }

/// <summary>Return text content instead of HTML.</summary>
[Parameter]
public SwitchParameter AsText { get; set; }

/// <inheritdoc />
protected override async Task ProcessRecordAsync() {
BrowserSession session = Session ?? (BrowserSession?)GetVariableValue("PSParseHTML_DefaultSession")
?? throw new PSInvalidOperationException("No session provided and no default session found.");

int flags = (InnerHtml.IsPresent ? 1 : 0) + (OuterHtml.IsPresent ? 1 : 0) + (AsText.IsPresent ? 1 : 0);
if (flags > 1) {
ThrowTerminatingError(new ErrorRecord(
new PSInvalidOperationException("Specify only one of -InnerHtml, -OuterHtml, or -AsText."),
"InvalidParameter", ErrorCategory.InvalidArgument, Selector));
return;
}

string result = await HtmlBrowserRenderer.GetContentAsync(
session.Page,
Selector,
InnerHtml.IsPresent,
AsText.IsPresent).ConfigureAwait(false);

WriteObject(result);
}
}

124 changes: 14 additions & 110 deletions Sources/PSParseHTML.PowerShell/CmdletGetHtmlInteractable.cs
Original file line number Diff line number Diff line change
@@ -1,128 +1,32 @@
using System.Collections.Generic;
using System.Management.Automation;
using System.Threading.Tasks;
using PSParseHTML;

namespace PSParseHTML.PowerShell;

/// <summary>
/// Cmdlet that lists clickable or interactable elements from a browser session.
/// Returns interactive elements from an active browser session.
/// </summary>
[Cmdlet(VerbsCommon.Get, "HTMLInteractable", DefaultParameterSetName = ParameterSetSession)]
[OutputType(typeof(InteractableElement))]
[Cmdlet(VerbsCommon.Get, "HTMLInteractable")]
[OutputType(typeof(HtmlInteractableInfo))]
public sealed class CmdletGetHtmlInteractable : AsyncPSCmdlet {
private const string ParameterSetSession = "Session";
private const string ParameterSetUrl = "Url";
private const string ParameterSetFile = "File";
/// <summary>Browser session containing the page.</summary>
[Parameter(Position = 0, ValueFromPipeline = true)]
public BrowserSession? Session { get; set; }

/// <summary>Existing browser session.</summary>
[Parameter(Mandatory = true, Position = 0, ParameterSetName = ParameterSetSession, ValueFromPipeline = true)]
public BrowserSession Session { get; set; } = null!;

/// <summary>URL of the page to inspect.</summary>
[Parameter(Mandatory = true, Position = 0, ParameterSetName = ParameterSetUrl)]
public string Url { get; set; } = string.Empty;

/// <summary>Path to a local HTML file.</summary>
[Parameter(Mandatory = true, Position = 0, ParameterSetName = ParameterSetFile)]
[Alias("Path")]
public string File { get; set; } = string.Empty;

/// <summary>Browser engine to use when loading <see cref="Url"/> or <see cref="File"/>.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
[Parameter(ParameterSetName = ParameterSetFile)]
public BrowserEngine Browser { get; set; } = BrowserEngine.Chromium;

/// <summary>Reinstall browser runtimes when using <see cref="Url"/> or <see cref="File"/>.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
[Parameter(ParameterSetName = ParameterSetFile)]
public SwitchParameter Clean { get; set; }

/// <summary>Include elements hidden from view.</summary>
[Parameter]
public SwitchParameter IncludeHidden { get; set; }

/// <summary>Maximum number of elements to return.</summary>
/// <summary>Optional case-insensitive filter applied to the element text.</summary>
[Parameter]
[ValidateRange(1, int.MaxValue)]
public int Limit { get; set; } = 100;

/// <summary>Credentials for pages requiring authentication.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
public PSCredential? Credential { get; set; }

/// <summary>Basic authentication username.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
public string? Username { get; set; }

/// <summary>Basic authentication password.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
public string? Password { get; set; }

/// <summary>URL of a login form.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
public string? LoginUrl { get; set; }

/// <summary>CSS selector for the username field.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
public string? UsernameSelector { get; set; }

/// <summary>CSS selector for the password field.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
public string? PasswordSelector { get; set; }

/// <summary>CSS selector for the submit element.</summary>
[Parameter(ParameterSetName = ParameterSetUrl)]
public string? SubmitSelector { get; set; }
public string? Filter { get; set; }

/// <inheritdoc />
protected override async Task ProcessRecordAsync() {
List<InteractableElement> list;
switch (ParameterSetName) {
case ParameterSetUrl:
string? user = Credential?.UserName ?? Username;
string? pass = Credential?.GetNetworkCredential().Password ?? Password;
FormLoginOptions? form = null;
if (!string.IsNullOrEmpty(LoginUrl) &&
!string.IsNullOrEmpty(UsernameSelector) &&
!string.IsNullOrEmpty(PasswordSelector) &&
!string.IsNullOrEmpty(SubmitSelector)) {
form = new FormLoginOptions {
LoginUrl = LoginUrl!,
UsernameSelector = UsernameSelector!,
PasswordSelector = PasswordSelector!,
SubmitSelector = SubmitSelector!
};
}

list = await HtmlBrowserRenderer.GetInteractableElementsAsync(
Url,
Browser,
Clean.IsPresent,
IncludeHidden.IsPresent,
Limit,
user,
pass,
form).ConfigureAwait(false);
break;
case ParameterSetFile:
list = await HtmlBrowserRenderer.GetInteractableElementsFromFileAsync(
File,
Browser,
Clean.IsPresent,
IncludeHidden.IsPresent,
Limit).ConfigureAwait(false);
break;
default:
list = await HtmlBrowserRenderer.GetInteractableElementsAsync(
Session.Page,
IncludeHidden.IsPresent,
Limit).ConfigureAwait(false);
break;
}
BrowserSession session = Session ?? (BrowserSession?)GetVariableValue("PSParseHTML_DefaultSession")
?? throw new PSInvalidOperationException("No session provided and no default session found.");

foreach (InteractableElement element in list) {
WriteObject(element);
List<HtmlInteractableInfo> list = await HtmlBrowserRenderer.GetInteractablesAsync(session.Page).ConfigureAwait(false);
if (!string.IsNullOrEmpty(Filter)) {
list = list.FindAll(x => x.Text.IndexOf(Filter, System.StringComparison.OrdinalIgnoreCase) >= 0);
}
WriteObject(list, true);
}
}
76 changes: 68 additions & 8 deletions Sources/PSParseHTML.PowerShell/CmdletInvokeHtmlNavigation.cs
Original file line number Diff line number Diff line change
@@ -1,28 +1,88 @@
using System.Management.Automation;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
using Microsoft.Playwright;

namespace PSParseHTML.PowerShell;

/// <summary>
/// Cmdlet that navigates an existing browser session to a new URL.
/// </summary>
[Cmdlet(VerbsLifecycle.Invoke, "HTMLNavigation")]
[Cmdlet(VerbsLifecycle.Invoke, "HTMLNavigation", DefaultParameterSetName = ParameterSetUrl)]
[OutputType(typeof(BrowserSession))]
public sealed class CmdletInvokeHtmlNavigation : AsyncPSCmdlet {
private const string ParameterSetUrl = "ByUrl";
private const string ParameterSetText = "ByText";
private const string ParameterSetSelector = "BySelector";

/// <summary>Existing browser session.</summary>
[Parameter(Mandatory = true, Position = 0, ValueFromPipeline = true)]
public BrowserSession Session { get; set; } = null!;
[Parameter(Position = 0, ValueFromPipeline = true)]
public BrowserSession? Session { get; set; }

/// <summary>Destination URL.</summary>
[Parameter(Mandatory = true, Position = 1)]
public string Url { get; set; } = string.Empty;
[Parameter(Mandatory = true, Position = 1, ParameterSetName = ParameterSetUrl)]
public string? Url { get; set; }

/// <summary>Text of the element to click.</summary>
[Parameter(Mandatory = true, Position = 1, ParameterSetName = ParameterSetText)]
public string? Text { get; set; }

/// <summary>CSS selector of the element to click.</summary>
[Parameter(Mandatory = true, Position = 1, ParameterSetName = ParameterSetSelector)]
public string? Selector { get; set; }

/// <summary>Use exact text match.</summary>
[Parameter(ParameterSetName = ParameterSetText)]
public SwitchParameter Exact { get; set; }

/// <summary>Regular expression for text match.</summary>
[Parameter(ParameterSetName = ParameterSetText)]
public string? Regex { get; set; }

/// <summary>Wait for navigation event after clicking.</summary>
[Parameter(ParameterSetName = ParameterSetText)]
[Parameter(ParameterSetName = ParameterSetSelector)]
public SwitchParameter WaitForNavigation { get; set; }

/// <summary>Return the session object.</summary>
[Parameter]
public SwitchParameter PassThru { get; set; }
/// <summary>Timeout in milliseconds for navigation and clicks.</summary>
[Parameter]
[ValidateRange(0,int.MaxValue)]
public int Timeout { get; set; } = 10000;

/// <inheritdoc />
protected override async Task ProcessRecordAsync() {
await Session.Page.GotoAsync(Url).ConfigureAwait(false);
await Session.Page.WaitForLoadStateAsync(LoadState.NetworkIdle).ConfigureAwait(false);
WriteObject(Session);
BrowserSession session = Session ?? (BrowserSession?)GetVariableValue("PSParseHTML_DefaultSession")
?? throw new PSInvalidOperationException("No session provided and no default session found.");

try {
switch (ParameterSetName) {
case ParameterSetUrl:
await HtmlBrowserRenderer.NavigateAsync(session, Url!, Timeout).ConfigureAwait(false);
break;
case ParameterSetSelector:
await HtmlBrowserRenderer.ClickSelectorAsync(session, Selector!, WaitForNavigation.IsPresent, Timeout).ConfigureAwait(false);
break;
case ParameterSetText:
await HtmlBrowserRenderer.ClickTextAsync(session, Text!, Exact.IsPresent, Regex, WaitForNavigation.IsPresent, Timeout).ConfigureAwait(false);
break;
}
} catch (PlaywrightException ex) when (ex.Message.Contains("strict mode violation")) {
string query = ParameterSetName switch {
ParameterSetSelector => Selector!,
_ => Text!
};
string message = HtmlBrowserRenderer.FormatStrictModeMessage(query, ex);
WriteError(new ErrorRecord(new InvalidOperationException(message), "StrictModeViolation", ErrorCategory.InvalidOperation, query));
return;
}

if (PassThru.IsPresent) {
WriteObject(session);
}
}

}

7 changes: 7 additions & 0 deletions Sources/PSParseHTML.PowerShell/CmdletInvokeHtmlRendering.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ public sealed class CmdletInvokeHtmlRendering : AsyncPSCmdlet {
[Parameter]
public SwitchParameter Session { get; set; }

/// <summary>Do not set the opened session as the default session.</summary>
[Parameter]
public SwitchParameter NoDefault { get; set; }

/// <inheritdoc />
protected override async Task ProcessRecordAsync() {
string? user = Credential?.UserName ?? Username;
Expand All @@ -86,6 +90,9 @@ protected override async Task ProcessRecordAsync() {
user,
pass,
form).ConfigureAwait(false);
if (!NoDefault.IsPresent) {
SessionState.PSVariable.Set("PSParseHTML_DefaultSession", sess);
}
WriteObject(sess);
} else if (!string.IsNullOrEmpty(OutFile)) {
await HtmlBrowserRenderer.SavePageContentAsync(Url, OutFile, Browser, Clean.IsPresent, user, pass, form).ConfigureAwait(false);
Expand Down
8 changes: 5 additions & 3 deletions Sources/PSParseHTML.PowerShell/CmdletSaveHtmlDownload.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ public sealed class CmdletSaveHtmlDownload : AsyncPSCmdlet {
public string Url { get; set; } = string.Empty;

/// <summary>Existing browser session.</summary>
[Parameter(Mandatory = true, Position = 0, ParameterSetName = ParameterSetSession, ValueFromPipeline = true)]
public BrowserSession Session { get; set; } = null!;
[Parameter(Position = 0, ParameterSetName = ParameterSetSession, ValueFromPipeline = true)]
public BrowserSession? Session { get; set; }

/// <summary>Directory where downloads will be saved.</summary>
[Parameter(Mandatory = true)]
Expand All @@ -44,9 +44,11 @@ public sealed class CmdletSaveHtmlDownload : AsyncPSCmdlet {

/// <inheritdoc />
protected override async Task ProcessRecordAsync() {
BrowserSession? session = Session ?? (BrowserSession?)GetVariableValue("PSParseHTML_DefaultSession");

List<string> files = ParameterSetName switch {
ParameterSetSession => await HtmlBrowserRenderer.SavePageDownloadsAsync(
Session.Page,
(session ?? throw new PSInvalidOperationException("No session provided and no default session found.")).Page,
Path,
Filter).ConfigureAwait(false),
_ => await HtmlBrowserRenderer.SavePageDownloadsAsync(
Expand Down
Loading
Loading