Add Amazon PA API and RapidAPI metadata support
Build and Push Docker Image / build-and-push (push) Successful in 3m50s

Introduce Amazon PA API and RapidAPI clients for fetching product metadata, with fallback strategies for Amazon URLs.
Add `AmazonMetadataOptions` for configuration and update `RegistryMetadataService` to integrate these clients.
Enhance metadata scraping with ASIN extraction, country code handling, and modern browser headers.
Include deployment files and register new services in `Program.cs`.
Update `appsettings.json` with placeholders for API credentials.
This commit is contained in:
Arne Moerman
2026-05-26 20:13:31 +02:00
parent 3b219da4eb
commit 50238b57c8
8 changed files with 552 additions and 5 deletions
+5
View File
@@ -1,4 +1,9 @@
<Solution>
<Folder Name="/deploy/">
<File Path="deploy/portainer-stack.env.example" />
<File Path="deploy/portainer-stack.yml" />
<File Path="deploy/README.md" />
</Folder>
<Folder Name="/gitea build/">
<File Path=".gitea/workflows/build-and-push.yml" />
</Folder>
+5
View File
@@ -23,6 +23,11 @@ services:
- Smtp__FromAddress=${SMTP_FROM_ADDRESS}
- Smtp__FromName=${SMTP_FROM_NAME}
- PublicUrl=${PUBLIC_URL}
- AmazonMetadata__AccessKey=${AMAZON_PA_ACCESS_KEY}
- AmazonMetadata__SecretKey=${AMAZON_PA_SECRET_KEY}
- AmazonMetadata__AssociateTag=${AMAZON_ASSOCIATE_TAG}
- AmazonMetadata__PaApiHost=${AMAZON_PA_API_HOST}
- AmazonMetadata__RapidApiKey=${AMAZON_RAPID_API_KEY}
depends_on:
- mssql
networks:
@@ -0,0 +1,41 @@
namespace BirthList.Web.Configuration;
internal sealed class AmazonMetadataOptions
{
/// <summary>
/// Amazon PA API access key. When set together with <see cref="SecretKey"/> and <see cref="AssociateTag"/>,
/// the PA API is used as the primary metadata source for Amazon URLs.
/// </summary>
public string? AccessKey { get; set; }
/// <summary>
/// Amazon PA API secret key.
/// </summary>
public string? SecretKey { get; set; }
/// <summary>
/// Amazon Associates tag (e.g. "yourstore-21").
/// </summary>
public string? AssociateTag { get; set; }
/// <summary>
/// Amazon PA API marketplace host, e.g. "webservices.amazon.com" or "webservices.amazon.com.be".
/// Defaults to "webservices.amazon.com".
/// </summary>
public string PaApiHost { get; set; } = "webservices.amazon.com";
/// <summary>
/// RapidAPI key for the "Real-Time Amazon Data" API.
/// When set, this is used as a fallback when the direct scrape returns no metadata.
/// </summary>
public string? RapidApiKey { get; set; }
/// <summary>Returns true when PA API credentials are fully configured.</summary>
public bool IsPaApiConfigured =>
!string.IsNullOrWhiteSpace(AccessKey) &&
!string.IsNullOrWhiteSpace(SecretKey) &&
!string.IsNullOrWhiteSpace(AssociateTag);
/// <summary>Returns true when the RapidAPI fallback is configured.</summary>
public bool IsRapidApiConfigured => !string.IsNullOrWhiteSpace(RapidApiKey);
}
@@ -0,0 +1,241 @@
using System.Globalization;
using System.Net.Http.Json;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using BirthList.Web.Configuration;
using Microsoft.Extensions.Options;
namespace BirthList.Web.Features.Registries;
/// <summary>
/// Calls the Amazon Product Advertising API 5.0 to retrieve product metadata.
/// Implements AWS Signature Version 4 signing without external SDK dependencies.
/// </summary>
internal sealed class AmazonPaApiClient(IHttpClientFactory httpClientFactory, IOptions<AmazonMetadataOptions> options)
{
private const string Service = "ProductAdvertisingAPI";
private const string Region = "us-east-1";
private const string Operation = "GetItems";
private readonly AmazonMetadataOptions _options = options.Value;
/// <summary>
/// Fetches product metadata for the given ASIN. Returns null when the item is not found or credentials are not configured.
/// </summary>
public async Task<UrlMetadataResult?> GetItemAsync(string asin, string normalizedUrl, CancellationToken cancellationToken)
{
if (!_options.IsPaApiConfigured)
{
return null;
}
var payload = BuildPayload(asin);
var payloadBytes = Encoding.UTF8.GetBytes(payload);
var payloadHash = ComputeSha256Hex(payloadBytes);
var now = DateTimeOffset.UtcNow;
var dateStamp = now.ToString("yyyyMMdd", CultureInfo.InvariantCulture);
var amzDate = now.ToString("yyyyMMddTHHmmssZ", CultureInfo.InvariantCulture);
var host = _options.PaApiHost;
var path = "/paapi5/getitems";
var headers = new SortedDictionary<string, string>(StringComparer.Ordinal)
{
["content-encoding"] = "amz-1.0",
["content-type"] = "application/json; charset=utf-8",
["host"] = host,
["x-amz-date"] = amzDate,
["x-amz-target"] = $"com.amazon.paapi5.v1.ProductAdvertisingAPIv1.{Operation}"
};
var signedHeaderNames = string.Join(";", headers.Keys);
var canonicalHeaders = string.Concat(headers.Select(h => $"{h.Key}:{h.Value}\n"));
var canonicalRequest = string.Join("\n",
"POST",
path,
string.Empty,
canonicalHeaders,
signedHeaderNames,
payloadHash);
var credentialScope = $"{dateStamp}/{Region}/{Service}/aws4_request";
var stringToSign = string.Join("\n",
"AWS4-HMAC-SHA256",
amzDate,
credentialScope,
ComputeSha256Hex(Encoding.UTF8.GetBytes(canonicalRequest)));
var signingKey = GetSigningKey(_options.SecretKey!, dateStamp);
var signature = ComputeHmacHex(signingKey, stringToSign);
var authorization =
$"AWS4-HMAC-SHA256 Credential={_options.AccessKey}/{credentialScope}, " +
$"SignedHeaders={signedHeaderNames}, Signature={signature}";
var client = httpClientFactory.CreateClient("AmazonPaApi");
using var request = new HttpRequestMessage(HttpMethod.Post, $"https://{host}{path}");
request.Content = new ByteArrayContent(payloadBytes);
foreach (var (key, value) in headers)
{
if (key is "host" or "content-type")
{
continue;
}
request.Headers.TryAddWithoutValidation(key, value);
}
request.Content.Headers.TryAddWithoutValidation("Content-Type", "application/json; charset=utf-8");
request.Content.Headers.TryAddWithoutValidation("Content-Encoding", "amz-1.0");
request.Headers.TryAddWithoutValidation("Authorization", authorization);
using var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
if (!response.IsSuccessStatusCode)
{
return null;
}
var paResponse = await response.Content.ReadFromJsonAsync<PaApiResponse>(cancellationToken: cancellationToken).ConfigureAwait(false);
var item = paResponse?.ItemsResult?.Items?.FirstOrDefault();
if (item is null)
{
return null;
}
var title = item.ItemInfo?.Title?.DisplayValue;
var imageUrl = item.Images?.Primary?.Large?.Url ?? item.Images?.Primary?.Medium?.Url;
var priceAmount = item.Offers?.Listings?.FirstOrDefault()?.Price?.Amount;
var currency = item.Offers?.Listings?.FirstOrDefault()?.Price?.Currency;
return new UrlMetadataResult
{
NormalizedUrl = normalizedUrl,
Title = title,
Description = null,
ImageUrl = imageUrl,
PriceAmount = priceAmount,
CurrencyCode = string.IsNullOrWhiteSpace(currency) ? null : currency.ToUpperInvariant()
};
}
private string BuildPayload(string asin)
{
var doc = new
{
ItemIds = new[] { asin },
Resources = new[]
{
"ItemInfo.Title",
"Images.Primary.Large",
"Images.Primary.Medium",
"Offers.Listings.Price"
},
PartnerTag = _options.AssociateTag,
PartnerType = "Associates",
Marketplace = $"www.{_options.PaApiHost.Replace("webservices.", string.Empty, StringComparison.OrdinalIgnoreCase)}"
};
return JsonSerializer.Serialize(doc);
}
private static byte[] GetSigningKey(string secretKey, string dateStamp)
{
var kDate = ComputeHmac(Encoding.UTF8.GetBytes($"AWS4{secretKey}"), dateStamp);
var kRegion = ComputeHmac(kDate, Region);
var kService = ComputeHmac(kRegion, Service);
return ComputeHmac(kService, "aws4_request");
}
private static byte[] ComputeHmac(byte[] key, string data) =>
HMACSHA256.HashData(key, Encoding.UTF8.GetBytes(data));
private static string ComputeHmacHex(byte[] key, string data) =>
Convert.ToHexString(ComputeHmac(key, data)).ToLowerInvariant();
private static string ComputeSha256Hex(byte[] data) =>
Convert.ToHexString(SHA256.HashData(data)).ToLowerInvariant();
// Minimal deserialization models for PA API response
private sealed class PaApiResponse
{
[JsonPropertyName("ItemsResult")]
public PaItemsResult? ItemsResult { get; init; }
}
private sealed class PaItemsResult
{
[JsonPropertyName("Items")]
public List<PaItem>? Items { get; init; }
}
private sealed class PaItem
{
[JsonPropertyName("ItemInfo")]
public PaItemInfo? ItemInfo { get; init; }
[JsonPropertyName("Images")]
public PaImages? Images { get; init; }
[JsonPropertyName("Offers")]
public PaOffers? Offers { get; init; }
}
private sealed class PaItemInfo
{
[JsonPropertyName("Title")]
public PaDisplayValue? Title { get; init; }
}
private sealed class PaDisplayValue
{
[JsonPropertyName("DisplayValue")]
public string? DisplayValue { get; init; }
}
private sealed class PaImages
{
[JsonPropertyName("Primary")]
public PaImageSet? Primary { get; init; }
}
private sealed class PaImageSet
{
[JsonPropertyName("Large")]
public PaImageVariant? Large { get; init; }
[JsonPropertyName("Medium")]
public PaImageVariant? Medium { get; init; }
}
private sealed class PaImageVariant
{
[JsonPropertyName("URL")]
public string? Url { get; init; }
}
private sealed class PaOffers
{
[JsonPropertyName("Listings")]
public List<PaListing>? Listings { get; init; }
}
private sealed class PaListing
{
[JsonPropertyName("Price")]
public PaPrice? Price { get; init; }
}
private sealed class PaPrice
{
[JsonPropertyName("Amount")]
public decimal? Amount { get; init; }
[JsonPropertyName("Currency")]
public string? Currency { get; init; }
}
}
@@ -0,0 +1,126 @@
using System.Net.Http.Json;
using System.Text.Json.Serialization;
using BirthList.Web.Configuration;
using Microsoft.Extensions.Options;
namespace BirthList.Web.Features.Registries;
/// <summary>
/// Calls the "Real-Time Amazon Data" API on RapidAPI as a metadata fallback.
/// https://rapidapi.com/letscrape-6bRBa3QguO5/api/real-time-amazon-data
/// </summary>
internal sealed class RapidApiMetadataClient(IHttpClientFactory httpClientFactory, IOptions<AmazonMetadataOptions> options)
{
private const string Host = "real-time-amazon-data.p.rapidapi.com";
private readonly AmazonMetadataOptions _options = options.Value;
/// <summary>
/// Fetches product metadata for the given ASIN and Amazon country code (e.g. "BE", "US", "DE").
/// Returns null when the API key is not configured or the item is not found.
/// </summary>
public async Task<UrlMetadataResult?> GetItemAsync(string asin, string countryCode, string normalizedUrl, CancellationToken cancellationToken)
{
if (!_options.IsRapidApiConfigured)
{
return null;
}
var client = httpClientFactory.CreateClient("RapidApi");
using var request = new HttpRequestMessage(
HttpMethod.Get,
$"https://{Host}/product-details?asin={Uri.EscapeDataString(asin)}&country={Uri.EscapeDataString(countryCode)}");
request.Headers.TryAddWithoutValidation("x-rapidapi-host", Host);
request.Headers.TryAddWithoutValidation("x-rapidapi-key", _options.RapidApiKey);
using var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
if (!response.IsSuccessStatusCode)
{
return null;
}
var result = await response.Content.ReadFromJsonAsync<RapidApiProductResponse>(cancellationToken: cancellationToken).ConfigureAwait(false);
if (result?.Data is null)
{
return null;
}
var data = result.Data;
var imageUrl = data.ProductMainImageUrl
?? data.ProductPhotos?.FirstOrDefault();
decimal? price = null;
string? currency = null;
var priceEntry = data.ProductPrice ?? data.ProductOriginalPrice;
if (!string.IsNullOrWhiteSpace(priceEntry))
{
(price, currency) = ParseRapidApiPrice(priceEntry);
}
return new UrlMetadataResult
{
NormalizedUrl = normalizedUrl,
Title = data.ProductTitle,
Description = data.ProductDescription,
ImageUrl = imageUrl,
PriceAmount = price,
CurrencyCode = currency
};
}
private static (decimal? amount, string? currency) ParseRapidApiPrice(string raw)
{
// RapidAPI returns prices like "€29,99" or "$19.99" or "29.99 EUR"
string? currencyCode = null;
if (raw.Contains('€')) currencyCode = "EUR";
else if (raw.Contains('$')) currencyCode = "USD";
else if (raw.Contains('£')) currencyCode = "GBP";
else
{
var match = System.Text.RegularExpressions.Regex.Match(raw, @"\b([A-Z]{3})\b");
if (match.Success) currencyCode = match.Value;
}
var cleaned = System.Text.RegularExpressions.Regex.Replace(raw, "[^0-9.,]", string.Empty);
cleaned = cleaned.Replace(",", ".");
if (decimal.TryParse(cleaned, System.Globalization.NumberStyles.Number,
System.Globalization.CultureInfo.InvariantCulture, out var amount))
{
return (amount, currencyCode);
}
return (null, currencyCode);
}
// Minimal deserialization models
private sealed class RapidApiProductResponse
{
[JsonPropertyName("data")]
public RapidApiProductData? Data { get; init; }
}
private sealed class RapidApiProductData
{
[JsonPropertyName("product_title")]
public string? ProductTitle { get; init; }
[JsonPropertyName("product_description")]
public string? ProductDescription { get; init; }
[JsonPropertyName("product_main_image_url")]
public string? ProductMainImageUrl { get; init; }
[JsonPropertyName("product_photos")]
public List<string>? ProductPhotos { get; init; }
[JsonPropertyName("product_price")]
public string? ProductPrice { get; init; }
[JsonPropertyName("product_original_price")]
public string? ProductOriginalPrice { get; init; }
}
}
@@ -3,10 +3,14 @@ using System.Text.RegularExpressions;
namespace BirthList.Web.Features.Registries;
internal sealed class RegistryMetadataService(IHttpClientFactory httpClientFactory)
internal sealed class RegistryMetadataService(
IHttpClientFactory httpClientFactory,
AmazonPaApiClient paApiClient,
RapidApiMetadataClient rapidApiClient)
{
private static readonly Regex MetaTagRegex = new("<meta\\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeSpan.FromSeconds(1));
private static readonly Regex AttributeRegex = new("(name|property|content)\\s*=\\s*['\"]([^'\"]*)['\"]", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeSpan.FromSeconds(1));
private static readonly Regex AsinRegex = new(@"/dp/(?<asin>[A-Z0-9]{10})(?:/|$)", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeSpan.FromSeconds(1));
public async Task<UrlMetadataResult?> FetchAsync(string url, CancellationToken cancellationToken)
{
@@ -16,18 +20,82 @@ internal sealed class RegistryMetadataService(IHttpClientFactory httpClientFacto
}
var normalizedUri = NormalizeProductUri(sourceUri);
var isAmazon = normalizedUri.Host.Contains("amazon", StringComparison.OrdinalIgnoreCase);
// Strategy 1: PA API (Amazon only, when credentials are configured)
if (isAmazon)
{
var asin = ExtractAsin(normalizedUri);
if (!string.IsNullOrWhiteSpace(asin))
{
var paResult = await paApiClient.GetItemAsync(asin, normalizedUri.AbsoluteUri, cancellationToken).ConfigureAwait(false);
if (paResult is not null && HasUsefulMetadata(paResult))
{
return paResult;
}
}
}
// Strategy 2: Direct HTML scrape
var scrapeResult = await ScrapeAsync(normalizedUri, cancellationToken).ConfigureAwait(false);
if (scrapeResult is not null && HasUsefulMetadata(scrapeResult))
{
return scrapeResult;
}
// Strategy 3: RapidAPI fallback (Amazon only)
if (isAmazon)
{
var asin = ExtractAsin(normalizedUri);
if (!string.IsNullOrWhiteSpace(asin))
{
var countryCode = ExtractAmazonCountryCode(normalizedUri);
var rapidResult = await rapidApiClient.GetItemAsync(asin, countryCode, normalizedUri.AbsoluteUri, cancellationToken).ConfigureAwait(false);
if (rapidResult is not null)
{
return rapidResult;
}
}
}
// Return whatever the scrape got (may be partial or null) rather than hard-failing
return scrapeResult;
}
private async Task<UrlMetadataResult?> ScrapeAsync(Uri normalizedUri, CancellationToken cancellationToken)
{
var client = httpClientFactory.CreateClient("RegistryMetadata");
using var request = new HttpRequestMessage(HttpMethod.Get, normalizedUri);
request.Headers.UserAgent.ParseAdd("Mozilla/5.0 (compatible; BirthListBot/1.0)");
request.Headers.UserAgent.ParseAdd("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36");
request.Headers.TryAddWithoutValidation("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8");
request.Headers.TryAddWithoutValidation("Accept-Encoding", "gzip, deflate, br");
request.Headers.AcceptLanguage.ParseAdd("en-US,en;q=0.9");
request.Headers.TryAddWithoutValidation("Upgrade-Insecure-Requests", "1");
request.Headers.TryAddWithoutValidation("Sec-Fetch-Dest", "document");
request.Headers.TryAddWithoutValidation("Sec-Fetch-Mode", "navigate");
request.Headers.TryAddWithoutValidation("Sec-Fetch-Site", "none");
request.Headers.TryAddWithoutValidation("Sec-Fetch-User", "?1");
request.Headers.TryAddWithoutValidation("Cache-Control", "max-age=0");
using var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
var html = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
string? html = null;
try
{
html = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
}
catch (HttpRequestException)
{
// Body unreadable; fall through with null html.
}
if (string.IsNullOrWhiteSpace(html))
{
if (!response.IsSuccessStatusCode)
{
throw new HttpRequestException($"Request failed with status {(int)response.StatusCode} {response.ReasonPhrase} and no response body.");
}
throw new InvalidOperationException("Metadata response content was empty.");
}
@@ -111,6 +179,15 @@ internal sealed class RegistryMetadataService(IHttpClientFactory httpClientFacto
}
}
var hasAnyMetadata = !string.IsNullOrWhiteSpace(title)
|| !string.IsNullOrWhiteSpace(image)
|| price.HasValue;
if (!hasAnyMetadata && !response.IsSuccessStatusCode)
{
throw new HttpRequestException($"Request failed with status {(int)response.StatusCode} {response.ReasonPhrase} and no metadata could be parsed from the response.");
}
return new UrlMetadataResult
{
NormalizedUrl = normalizedUri.AbsoluteUri,
@@ -122,6 +199,34 @@ internal sealed class RegistryMetadataService(IHttpClientFactory httpClientFacto
};
}
private static bool HasUsefulMetadata(UrlMetadataResult result) =>
!string.IsNullOrWhiteSpace(result.Title) ||
!string.IsNullOrWhiteSpace(result.ImageUrl) ||
result.PriceAmount.HasValue;
private static string? ExtractAsin(Uri uri)
{
var match = AsinRegex.Match(uri.AbsolutePath);
return match.Success ? match.Groups["asin"].Value.ToUpperInvariant() : null;
}
private static string ExtractAmazonCountryCode(Uri uri)
{
// amazon.com.be -> BE, amazon.de -> DE, amazon.co.uk -> GB, amazon.com -> US
var host = uri.Host.ToLowerInvariant();
if (host.EndsWith(".com.be", StringComparison.Ordinal)) return "BE";
if (host.EndsWith(".de", StringComparison.Ordinal)) return "DE";
if (host.EndsWith(".fr", StringComparison.Ordinal)) return "FR";
if (host.EndsWith(".nl", StringComparison.Ordinal)) return "NL";
if (host.EndsWith(".co.uk", StringComparison.Ordinal)) return "GB";
if (host.EndsWith(".es", StringComparison.Ordinal)) return "ES";
if (host.EndsWith(".it", StringComparison.Ordinal)) return "IT";
if (host.EndsWith(".ca", StringComparison.Ordinal)) return "CA";
if (host.EndsWith(".com.au", StringComparison.Ordinal)) return "AU";
if (host.EndsWith(".co.jp", StringComparison.Ordinal)) return "JP";
return "US";
}
private static bool IsGenericAmazonImage(string image)
{
return image.Contains("amazon.png", StringComparison.OrdinalIgnoreCase)
+11
View File
@@ -28,7 +28,16 @@ builder.Services.AddHttpClient("RegistryMetadata", client =>
{
client.Timeout = TimeSpan.FromSeconds(10);
});
builder.Services.AddHttpClient("AmazonPaApi", client =>
{
client.Timeout = TimeSpan.FromSeconds(10);
});
builder.Services.AddHttpClient("RapidApi", client =>
{
client.Timeout = TimeSpan.FromSeconds(10);
});
builder.Services.Configure<SmtpOptions>(builder.Configuration.GetSection("Smtp"));
builder.Services.Configure<AmazonMetadataOptions>(builder.Configuration.GetSection("AmazonMetadata"));
builder.Services.Configure<ForwardedHeadersOptions>(options =>
{
@@ -45,6 +54,8 @@ builder.Services.AddScoped<RegistryAuthorizationService>();
builder.Services.AddScoped<OwnerBootstrapService>();
builder.Services.AddScoped<RegistryService>();
builder.Services.AddScoped<RegistryMetadataService>();
builder.Services.AddScoped<AmazonPaApiClient>();
builder.Services.AddScoped<RapidApiMetadataClient>();
builder.Services.AddScoped<RegistryThemeService>();
builder.Services.AddScoped<RegistryUserContext>();
builder.Services.AddScoped<SmtpConfigurationStatusService>();
+14 -1
View File
@@ -39,5 +39,18 @@
}
},
"AllowedHosts": "*",
"PublicUrl": ""
"PublicUrl": "",
"AmazonMetadata": {
// Amazon Product Advertising API 5.0 credentials.
// Get these from your Amazon Associates account at https://affiliate-program.amazon.com
// When AccessKey, SecretKey and AssociateTag are all set, the PA API is used as primary metadata source.
"AccessKey": "",
"SecretKey": "",
"AssociateTag": "",
// PA API marketplace host, e.g. "webservices.amazon.com" (US/default) or "webservices.amazon.com.be" (BE)
"PaApiHost": "webservices.amazon.com",
// RapidAPI key for the "Real-Time Amazon Data" API (https://rapidapi.com/letscrape-6bRBa3QguO5/api/real-time-amazon-data)
// Used as fallback when direct scraping returns no metadata.
"RapidApiKey": ""
}
}